1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
64 #include "sched-int.h"
66 #include "tree-pass.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
75 #include "tree-iterator.h"
77 #include "case-cfn-macros.h"
79 #include "fold-const-call.h"
81 #include "tree-ssanames.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
87 #include "symbol-summary.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
93 #include "dwarf2out.h"
95 /* This file should be included last. */
96 #include "target-def.h"
98 #include "x86-tune-costs.h"
100 static rtx legitimize_dllimport_symbol (rtx, bool);
101 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
102 static rtx legitimize_pe_coff_symbol (rtx, bool);
103 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
104 static bool ix86_save_reg (unsigned int, bool, bool);
105 static bool ix86_function_naked (const_tree);
106 static bool ix86_notrack_prefixed_insn_p (rtx);
107 static void ix86_emit_restore_reg_using_pop (rtx);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
124 const struct processor_costs *ix86_tune_cost = NULL;
126 /* Set by -mtune or -Os. */
127 const struct processor_costs *ix86_cost = NULL;
129 /* Processor feature/optimization bitmasks. */
130 #define m_386 (HOST_WIDE_INT_1U<<PROCESSOR_I386)
131 #define m_486 (HOST_WIDE_INT_1U<<PROCESSOR_I486)
132 #define m_PENT (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUM)
133 #define m_LAKEMONT (HOST_WIDE_INT_1U<<PROCESSOR_LAKEMONT)
134 #define m_PPRO (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUMPRO)
135 #define m_PENT4 (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUM4)
136 #define m_NOCONA (HOST_WIDE_INT_1U<<PROCESSOR_NOCONA)
137 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
138 #define m_CORE2 (HOST_WIDE_INT_1U<<PROCESSOR_CORE2)
139 #define m_NEHALEM (HOST_WIDE_INT_1U<<PROCESSOR_NEHALEM)
140 #define m_SANDYBRIDGE (HOST_WIDE_INT_1U<<PROCESSOR_SANDYBRIDGE)
141 #define m_HASWELL (HOST_WIDE_INT_1U<<PROCESSOR_HASWELL)
142 #define m_BONNELL (HOST_WIDE_INT_1U<<PROCESSOR_BONNELL)
143 #define m_SILVERMONT (HOST_WIDE_INT_1U<<PROCESSOR_SILVERMONT)
144 #define m_KNL (HOST_WIDE_INT_1U<<PROCESSOR_KNL)
145 #define m_KNM (HOST_WIDE_INT_1U<<PROCESSOR_KNM)
146 #define m_SKYLAKE (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE)
147 #define m_SKYLAKE_AVX512 (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE_AVX512)
148 #define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE)
149 #define m_ICELAKE_CLIENT (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_CLIENT)
150 #define m_ICELAKE_SERVER (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_SERVER)
151 #define m_CASCADELAKE (HOST_WIDE_INT_1U<<PROCESSOR_CASCADELAKE)
152 #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
153 | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE)
154 #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
155 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2)
156 #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
157 #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
158 #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
159 #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
161 #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
162 #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
163 #define m_K6_GEODE (m_K6 | m_GEODE)
164 #define m_K8 (HOST_WIDE_INT_1U<<PROCESSOR_K8)
165 #define m_ATHLON (HOST_WIDE_INT_1U<<PROCESSOR_ATHLON)
166 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
167 #define m_AMDFAM10 (HOST_WIDE_INT_1U<<PROCESSOR_AMDFAM10)
168 #define m_BDVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER1)
169 #define m_BDVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER2)
170 #define m_BDVER3 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER3)
171 #define m_BDVER4 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER4)
172 #define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
173 #define m_ZNVER2 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER2)
174 #define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
175 #define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
176 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
177 #define m_BTVER (m_BTVER1 | m_BTVER2)
178 #define m_ZNVER (m_ZNVER1 | m_ZNVER2)
179 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
182 #define m_GENERIC (HOST_WIDE_INT_1U<<PROCESSOR_GENERIC)
184 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
186 #define DEF_TUNE(tune, name, selector) name,
187 #include "x86-tune.def"
191 /* Feature tests against the various tunings. */
192 unsigned char ix86_tune_features[X86_TUNE_LAST];
194 /* Feature tests against the various tunings used to create ix86_tune_features
195 based on the processor mask. */
196 static unsigned HOST_WIDE_INT initial_ix86_tune_features[X86_TUNE_LAST] = {
198 #define DEF_TUNE(tune, name, selector) selector,
199 #include "x86-tune.def"
203 /* Feature tests against the various architecture variations. */
204 unsigned char ix86_arch_features[X86_ARCH_LAST];
206 /* Feature tests against the various architecture variations, used to create
207 ix86_arch_features based on the processor mask. */
208 static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = {
209 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
210 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
212 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
215 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
218 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
221 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
225 /* In case the average insn count for single function invocation is
226 lower than this constant, emit fast (but longer) prologue and
228 #define FAST_PROLOGUE_INSN_COUNT 20
230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
231 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
232 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
233 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
235 /* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
241 AREG, DREG, CREG, BREG,
243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
247 /* arg pointer, flags, fpsr, frame */
248 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
250 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
254 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
256 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
257 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
258 /* SSE REX registers */
259 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
260 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
261 /* AVX-512 SSE registers */
262 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
263 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
264 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
265 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
266 /* Mask registers. */
267 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
268 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
271 /* The "default" register map used in 32bit mode. */
273 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
276 0, 2, 1, 3, 6, 7, 4, 5,
278 12, 13, 14, 15, 16, 17, 18, 19,
279 /* arg, flags, fpsr, frame */
280 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
281 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
283 21, 22, 23, 24, 25, 26, 27, 28,
285 29, 30, 31, 32, 33, 34, 35, 36,
286 /* extended integer registers */
287 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
288 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
289 /* extended sse registers */
290 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
291 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
292 /* AVX-512 registers 16-23 */
293 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
294 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
295 /* AVX-512 registers 24-31 */
296 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
297 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
299 93, 94, 95, 96, 97, 98, 99, 100
302 /* The "default" register map used in 64bit mode. */
304 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
307 0, 1, 2, 3, 4, 5, 6, 7,
309 33, 34, 35, 36, 37, 38, 39, 40,
310 /* arg, flags, fpsr, frame */
311 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
312 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
314 17, 18, 19, 20, 21, 22, 23, 24,
316 41, 42, 43, 44, 45, 46, 47, 48,
317 /* extended integer registers */
318 8, 9, 10, 11, 12, 13, 14, 15,
319 /* extended SSE registers */
320 25, 26, 27, 28, 29, 30, 31, 32,
321 /* AVX-512 registers 16-23 */
322 67, 68, 69, 70, 71, 72, 73, 74,
323 /* AVX-512 registers 24-31 */
324 75, 76, 77, 78, 79, 80, 81, 82,
326 118, 119, 120, 121, 122, 123, 124, 125
329 /* Define the register numbers to be used in Dwarf debugging information.
330 The SVR4 reference port C compiler uses the following register numbers
331 in its Dwarf output code:
332 0 for %eax (gcc regno = 0)
333 1 for %ecx (gcc regno = 2)
334 2 for %edx (gcc regno = 1)
335 3 for %ebx (gcc regno = 3)
336 4 for %esp (gcc regno = 7)
337 5 for %ebp (gcc regno = 6)
338 6 for %esi (gcc regno = 4)
339 7 for %edi (gcc regno = 5)
340 The following three DWARF register numbers are never generated by
341 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
342 believed these numbers have these meanings.
343 8 for %eip (no gcc equivalent)
344 9 for %eflags (gcc regno = 17)
345 10 for %trapno (no gcc equivalent)
346 It is not at all clear how we should number the FP stack registers
347 for the x86 architecture. If the version of SDB on x86/svr4 were
348 a bit less brain dead with respect to floating-point then we would
349 have a precedent to follow with respect to DWARF register numbers
350 for x86 FP registers, but the SDB on x86/svr4 was so completely
351 broken with respect to FP registers that it is hardly worth thinking
352 of it as something to strive for compatibility with.
353 The version of x86/svr4 SDB I had does (partially)
354 seem to believe that DWARF register number 11 is associated with
355 the x86 register %st(0), but that's about all. Higher DWARF
356 register numbers don't seem to be associated with anything in
357 particular, and even for DWARF regno 11, SDB only seemed to under-
358 stand that it should say that a variable lives in %st(0) (when
359 asked via an `=' command) if we said it was in DWARF regno 11,
360 but SDB still printed garbage when asked for the value of the
361 variable in question (via a `/' command).
362 (Also note that the labels SDB printed for various FP stack regs
363 when doing an `x' command were all wrong.)
364 Note that these problems generally don't affect the native SVR4
365 C compiler because it doesn't allow the use of -O with -g and
366 because when it is *not* optimizing, it allocates a memory
367 location for each floating-point variable, and the memory
368 location is what gets described in the DWARF AT_location
369 attribute for the variable in question.
370 Regardless of the severe mental illness of the x86/svr4 SDB, we
371 do something sensible here and we use the following DWARF
372 register numbers. Note that these are all stack-top-relative
374 11 for %st(0) (gcc regno = 8)
375 12 for %st(1) (gcc regno = 9)
376 13 for %st(2) (gcc regno = 10)
377 14 for %st(3) (gcc regno = 11)
378 15 for %st(4) (gcc regno = 12)
379 16 for %st(5) (gcc regno = 13)
380 17 for %st(6) (gcc regno = 14)
381 18 for %st(7) (gcc regno = 15)
383 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
386 0, 2, 1, 3, 6, 7, 5, 4,
388 11, 12, 13, 14, 15, 16, 17, 18,
389 /* arg, flags, fpsr, frame */
390 IGNORED_DWARF_REGNUM, 9,
391 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
393 21, 22, 23, 24, 25, 26, 27, 28,
395 29, 30, 31, 32, 33, 34, 35, 36,
396 /* extended integer registers */
397 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
398 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
399 /* extended sse registers */
400 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
401 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
402 /* AVX-512 registers 16-23 */
403 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
404 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
405 /* AVX-512 registers 24-31 */
406 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
407 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
409 93, 94, 95, 96, 97, 98, 99, 100
412 /* Define parameter passing and return registers. */
414 static int const x86_64_int_parameter_registers[6] =
416 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
419 static int const x86_64_ms_abi_int_parameter_registers[4] =
421 CX_REG, DX_REG, R8_REG, R9_REG
424 static int const x86_64_int_return_registers[4] =
426 AX_REG, DX_REG, DI_REG, SI_REG
429 /* Additional registers that are clobbered by SYSV calls. */
431 #define NUM_X86_64_MS_CLOBBERED_REGS 12
432 static int const x86_64_ms_sysv_extra_clobbered_registers
433 [NUM_X86_64_MS_CLOBBERED_REGS] =
437 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
438 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
444 XLOGUE_STUB_RESTORE_TAIL,
445 XLOGUE_STUB_SAVE_HFP,
446 XLOGUE_STUB_RESTORE_HFP,
447 XLOGUE_STUB_RESTORE_HFP_TAIL,
452 enum xlogue_stub_sets {
454 XLOGUE_SET_ALIGNED_PLUS_8,
455 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
456 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
461 /* Register save/restore layout used by out-of-line stubs. */
462 class xlogue_layout {
467 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
468 rsi) to where each register is stored. */
471 unsigned get_nregs () const {return m_nregs;}
472 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
474 const reginfo &get_reginfo (unsigned reg) const
476 gcc_assert (reg < m_nregs);
480 static const char *get_stub_name (enum xlogue_stub stub,
481 unsigned n_extra_args);
483 /* Returns an rtx for the stub's symbol based upon
484 1.) the specified stub (save, restore or restore_ret) and
485 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
486 3.) rather or not stack alignment is being performed. */
487 static rtx get_stub_rtx (enum xlogue_stub stub);
489 /* Returns the amount of stack space (including padding) that the stub
490 needs to store registers based upon data in the machine_function. */
491 HOST_WIDE_INT get_stack_space_used () const
493 const struct machine_function *m = cfun->machine;
494 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
496 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
497 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
500 /* Returns the offset for the base pointer used by the stub. */
501 HOST_WIDE_INT get_stub_ptr_offset () const
503 return STUB_INDEX_OFFSET + m_stack_align_off_in;
506 static const struct xlogue_layout &get_instance ();
507 static unsigned count_stub_managed_regs ();
508 static bool is_stub_managed_reg (unsigned regno, unsigned count);
510 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
511 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
512 static const unsigned MAX_REGS = 18;
513 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
514 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
515 static const unsigned STUB_NAME_MAX_LEN = 20;
516 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
517 static const unsigned REG_ORDER[MAX_REGS];
518 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
522 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
523 xlogue_layout (const xlogue_layout &);
525 /* True if hard frame pointer is used. */
528 /* Max number of register this layout manages. */
531 /* Incoming offset from 16-byte alignment. */
532 HOST_WIDE_INT m_stack_align_off_in;
534 /* Register order and offsets. */
535 struct reginfo m_regs[MAX_REGS];
537 /* Lazy-inited cache of symbol names for stubs. */
538 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
541 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
544 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
553 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
554 /* The below offset values are where each register is stored for the layout
555 relative to incoming stack pointer. The value of each m_regs[].offset will
556 be relative to the incoming base pointer (rax or rsi) used by the stub.
559 Offset: realigned or aligned + 8
560 Register aligned aligned + 8 aligned w/HFP w/HFP */
561 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
562 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
563 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
564 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
565 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
566 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
567 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
568 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
569 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
570 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
571 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
572 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
573 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
574 BP_REG, /* 0xc0 0xc8 N/A N/A */
575 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
576 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
577 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
578 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
581 /* Instantiate static const values. */
582 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
583 const unsigned xlogue_layout::MIN_REGS;
584 const unsigned xlogue_layout::MAX_REGS;
585 const unsigned xlogue_layout::MAX_EXTRA_REGS;
586 const unsigned xlogue_layout::VARIANT_COUNT;
587 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
589 /* Initialize xlogue_layout::s_stub_names to zero. */
590 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
593 /* Instantiates all xlogue_layout instances. */
594 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
595 xlogue_layout (0, false),
596 xlogue_layout (8, false),
597 xlogue_layout (0, true),
598 xlogue_layout (8, true)
601 /* Return an appropriate const instance of xlogue_layout based upon values
602 in cfun->machine and crtl. */
603 const struct xlogue_layout &
604 xlogue_layout::get_instance ()
606 enum xlogue_stub_sets stub_set;
607 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
609 if (stack_realign_fp)
610 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
611 else if (frame_pointer_needed)
612 stub_set = aligned_plus_8
613 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
614 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
616 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
618 return s_instances[stub_set];
621 /* Determine how many clobbered registers can be saved by the stub.
622 Returns the count of registers the stub will save and restore. */
624 xlogue_layout::count_stub_managed_regs ()
626 bool hfp = frame_pointer_needed || stack_realign_fp;
630 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
632 regno = REG_ORDER[i];
633 if (regno == BP_REG && hfp)
635 if (!ix86_save_reg (regno, false, false))
642 /* Determine if register REGNO is a stub managed register given the
643 total COUNT of stub managed registers. */
645 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
647 bool hfp = frame_pointer_needed || stack_realign_fp;
650 for (i = 0; i < count; ++i)
652 gcc_assert (i < MAX_REGS);
653 if (REG_ORDER[i] == BP_REG && hfp)
655 else if (REG_ORDER[i] == regno)
661 /* Constructor for xlogue_layout. */
662 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
663 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
664 m_stack_align_off_in (stack_align_off_in)
666 HOST_WIDE_INT offset = stack_align_off_in;
669 for (i = j = 0; i < MAX_REGS; ++i)
671 unsigned regno = REG_ORDER[i];
673 if (regno == BP_REG && hfp)
675 if (SSE_REGNO_P (regno))
678 /* Verify that SSE regs are always aligned. */
679 gcc_assert (!((stack_align_off_in + offset) & 15));
684 m_regs[j].regno = regno;
685 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
687 gcc_assert (j == m_nregs);
691 xlogue_layout::get_stub_name (enum xlogue_stub stub,
692 unsigned n_extra_regs)
694 const int have_avx = TARGET_AVX;
695 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
700 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
701 (have_avx ? "avx" : "sse"),
702 STUB_BASE_NAMES[stub],
703 MIN_REGS + n_extra_regs);
704 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
710 /* Return rtx of a symbol ref for the entry point (based upon
711 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
713 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
715 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
716 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
717 gcc_assert (stub < XLOGUE_STUB_COUNT);
718 gcc_assert (crtl->stack_realign_finalized);
720 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
723 /* Define the structure for the machine field in struct function. */
725 struct GTY(()) stack_local_entry {
729 struct stack_local_entry *next;
732 /* Which cpu are we scheduling for. */
733 enum attr_cpu ix86_schedule;
735 /* Which cpu are we optimizing for. */
736 enum processor_type ix86_tune;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
741 /* True if processor has SSE prefetch instruction. */
742 unsigned char x86_prefetch_sse;
744 /* -mstackrealign option */
745 static const char ix86_force_align_arg_pointer_string[]
746 = "force_align_arg_pointer";
748 static rtx (*ix86_gen_leave) (void);
749 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
750 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
751 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
752 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
753 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
754 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
755 static rtx (*ix86_gen_clzero) (rtx);
756 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
757 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
758 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
759 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
760 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
761 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
763 /* Preferred alignment for stack boundary in bits. */
764 unsigned int ix86_preferred_stack_boundary;
766 /* Alignment for incoming stack boundary in bits specified at
768 static unsigned int ix86_user_incoming_stack_boundary;
770 /* Default alignment for incoming stack boundary in bits. */
771 static unsigned int ix86_default_incoming_stack_boundary;
773 /* Alignment for incoming stack boundary in bits. */
774 unsigned int ix86_incoming_stack_boundary;
776 /* Calling abi specific va_list type nodes. */
777 static GTY(()) tree sysv_va_list_type_node;
778 static GTY(()) tree ms_va_list_type_node;
780 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
781 char internal_label_prefix[16];
782 int internal_label_prefix_len;
784 /* Fence to use after loop using movnt. */
787 /* Register class used for passing given 64bit part of the argument.
788 These represent classes as documented by the PS ABI, with the exception
789 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
790 use SF or DFmode move instead of DImode to avoid reformatting penalties.
792 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
793 whenever possible (upper half does contain padding). */
794 enum x86_64_reg_class
797 X86_64_INTEGER_CLASS,
798 X86_64_INTEGERSI_CLASS,
805 X86_64_COMPLEX_X87_CLASS,
809 #define MAX_CLASSES 8
811 /* Table of constants used by fldpi, fldln2, etc.... */
812 static REAL_VALUE_TYPE ext_80387_constants_table [5];
813 static bool ext_80387_constants_init;
816 static struct machine_function * ix86_init_machine_status (void);
817 static rtx ix86_function_value (const_tree, const_tree, bool);
818 static bool ix86_function_value_regno_p (const unsigned int);
819 static unsigned int ix86_function_arg_boundary (machine_mode,
821 static rtx ix86_static_chain (const_tree, bool);
822 static int ix86_function_regparm (const_tree, const_tree);
823 static void ix86_compute_frame_layout (void);
824 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
826 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
827 static tree ix86_canonical_va_list_type (tree);
828 static void predict_jump (int);
829 static unsigned int split_stack_prologue_scratch_regno (void);
830 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
832 enum ix86_function_specific_strings
834 IX86_FUNCTION_SPECIFIC_ARCH,
835 IX86_FUNCTION_SPECIFIC_TUNE,
836 IX86_FUNCTION_SPECIFIC_MAX
839 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
840 const char *, const char *, enum fpmath_unit,
842 static void ix86_function_specific_save (struct cl_target_option *,
843 struct gcc_options *opts);
844 static void ix86_function_specific_restore (struct gcc_options *opts,
845 struct cl_target_option *);
846 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
847 static void ix86_function_specific_print (FILE *, int,
848 struct cl_target_option *);
849 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
850 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
851 struct gcc_options *,
852 struct gcc_options *,
853 struct gcc_options *);
854 static bool ix86_can_inline_p (tree, tree);
855 static void ix86_set_current_function (tree);
856 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
858 static enum calling_abi ix86_function_abi (const_tree);
861 #ifndef SUBTARGET32_DEFAULT_CPU
862 #define SUBTARGET32_DEFAULT_CPU "i386"
865 /* Whether -mtune= or -march= were specified */
866 static int ix86_tune_defaulted;
867 static int ix86_arch_specified;
869 /* Vectorization library interface and handlers. */
870 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
872 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
873 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
875 /* This table must be in sync with enum processor_type in i386.h. */
876 static const struct processor_costs *processor_cost_table[] =
919 /* Guarantee that the array is aligned with enum processor_type. */
920 STATIC_ASSERT (ARRAY_SIZE (processor_cost_table) == PROCESSOR_max);
923 rest_of_handle_insert_vzeroupper (void)
927 /* vzeroupper instructions are inserted immediately after reload to
928 account for possible spills from 256bit or 512bit registers. The pass
929 reuses mode switching infrastructure by re-running mode insertion
930 pass, so disable entities that have already been processed. */
931 for (i = 0; i < MAX_386_ENTITIES; i++)
932 ix86_optimize_mode_switching[i] = 0;
934 ix86_optimize_mode_switching[AVX_U128] = 1;
936 /* Call optimize_mode_switching. */
937 g->get_passes ()->execute_pass_mode_switching ();
941 /* Return 1 if INSN uses or defines a hard register.
942 Hard register uses in a memory address are ignored.
943 Clobbers and flags definitions are ignored. */
946 has_non_address_hard_reg (rtx_insn *insn)
949 FOR_EACH_INSN_DEF (ref, insn)
950 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
951 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
952 && DF_REF_REGNO (ref) != FLAGS_REG)
955 FOR_EACH_INSN_USE (ref, insn)
956 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
962 /* Check if comparison INSN may be transformed
963 into vector comparison. Currently we transform
964 zero checks only which look like:
966 (set (reg:CCZ 17 flags)
967 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
968 (subreg:SI (reg:DI x) 0))
969 (const_int 0 [0]))) */
972 convertible_comparison_p (rtx_insn *insn)
977 rtx def_set = single_set (insn);
979 gcc_assert (def_set);
981 rtx src = SET_SRC (def_set);
982 rtx dst = SET_DEST (def_set);
984 gcc_assert (GET_CODE (src) == COMPARE);
986 if (GET_CODE (dst) != REG
987 || REGNO (dst) != FLAGS_REG
988 || GET_MODE (dst) != CCZmode)
991 rtx op1 = XEXP (src, 0);
992 rtx op2 = XEXP (src, 1);
994 if (op2 != CONST0_RTX (GET_MODE (op2)))
997 if (GET_CODE (op1) != IOR)
1000 op2 = XEXP (op1, 1);
1001 op1 = XEXP (op1, 0);
1005 || GET_MODE (op1) != SImode
1006 || GET_MODE (op2) != SImode
1007 || ((SUBREG_BYTE (op1) != 0
1008 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
1009 && (SUBREG_BYTE (op2) != 0
1010 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
1013 op1 = SUBREG_REG (op1);
1014 op2 = SUBREG_REG (op2);
1018 || GET_MODE (op1) != DImode)
1024 /* The DImode version of scalar_to_vector_candidate_p. */
1027 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
1029 rtx def_set = single_set (insn);
1034 if (has_non_address_hard_reg (insn))
1037 rtx src = SET_SRC (def_set);
1038 rtx dst = SET_DEST (def_set);
1040 if (GET_CODE (src) == COMPARE)
1041 return convertible_comparison_p (insn);
1043 /* We are interested in DImode promotion only. */
1044 if ((GET_MODE (src) != DImode
1045 && !CONST_INT_P (src))
1046 || GET_MODE (dst) != DImode)
1049 if (!REG_P (dst) && !MEM_P (dst))
1052 switch (GET_CODE (src))
1055 if (!TARGET_AVX512VL)
1061 if (!REG_P (XEXP (src, 1))
1062 && (!SUBREG_P (XEXP (src, 1))
1063 || SUBREG_BYTE (XEXP (src, 1)) != 0
1064 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1065 && (!CONST_INT_P (XEXP (src, 1))
1066 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1069 if (GET_MODE (XEXP (src, 1)) != QImode
1070 && !CONST_INT_P (XEXP (src, 1)))
1079 if (!REG_P (XEXP (src, 1))
1080 && !MEM_P (XEXP (src, 1))
1081 && !CONST_INT_P (XEXP (src, 1)))
1084 if (GET_MODE (XEXP (src, 1)) != DImode
1085 && !CONST_INT_P (XEXP (src, 1)))
1104 if (!REG_P (XEXP (src, 0))
1105 && !MEM_P (XEXP (src, 0))
1106 && !CONST_INT_P (XEXP (src, 0))
1107 /* Check for andnot case. */
1108 && (GET_CODE (src) != AND
1109 || GET_CODE (XEXP (src, 0)) != NOT
1110 || !REG_P (XEXP (XEXP (src, 0), 0))))
1113 if (GET_MODE (XEXP (src, 0)) != DImode
1114 && !CONST_INT_P (XEXP (src, 0)))
1120 /* The TImode version of scalar_to_vector_candidate_p. */
1123 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1125 rtx def_set = single_set (insn);
1130 if (has_non_address_hard_reg (insn))
1133 rtx src = SET_SRC (def_set);
1134 rtx dst = SET_DEST (def_set);
1136 /* Only TImode load and store are allowed. */
1137 if (GET_MODE (dst) != TImode)
1142 /* Check for store. Memory must be aligned or unaligned store
1143 is optimal. Only support store from register, standard SSE
1144 constant or CONST_WIDE_INT generated from piecewise store.
1146 ??? Verify performance impact before enabling CONST_INT for
1148 if (misaligned_operand (dst, TImode)
1149 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1152 switch (GET_CODE (src))
1158 case CONST_WIDE_INT:
1162 return standard_sse_constant_p (src, TImode);
1165 else if (MEM_P (src))
1167 /* Check for load. Memory must be aligned or unaligned load is
1170 && (!misaligned_operand (src, TImode)
1171 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1177 /* Return 1 if INSN may be converted into vector
1181 scalar_to_vector_candidate_p (rtx_insn *insn)
1184 return timode_scalar_to_vector_candidate_p (insn);
1186 return dimode_scalar_to_vector_candidate_p (insn);
1189 /* The DImode version of remove_non_convertible_regs. */
1192 dimode_remove_non_convertible_regs (bitmap candidates)
1196 bitmap regs = BITMAP_ALLOC (NULL);
1198 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1200 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1201 rtx reg = SET_DEST (def_set);
1204 || bitmap_bit_p (regs, REGNO (reg))
1205 || HARD_REGISTER_P (reg))
1208 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1210 def = DF_REF_NEXT_REG (def))
1212 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1216 "r%d has non convertible definition in insn %d\n",
1217 REGNO (reg), DF_REF_INSN_UID (def));
1219 bitmap_set_bit (regs, REGNO (reg));
1225 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1227 for (df_ref def = DF_REG_DEF_CHAIN (id);
1229 def = DF_REF_NEXT_REG (def))
1230 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1233 fprintf (dump_file, "Removing insn %d from candidates list\n",
1234 DF_REF_INSN_UID (def));
1236 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1243 /* For a register REGNO, scan instructions for its defs and uses.
1244 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1247 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1250 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1252 def = DF_REF_NEXT_REG (def))
1254 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1258 "r%d has non convertible def in insn %d\n",
1259 regno, DF_REF_INSN_UID (def));
1261 bitmap_set_bit (regs, regno);
1266 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1268 ref = DF_REF_NEXT_REG (ref))
1270 /* Debug instructions are skipped. */
1271 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1272 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1276 "r%d has non convertible use in insn %d\n",
1277 regno, DF_REF_INSN_UID (ref));
1279 bitmap_set_bit (regs, regno);
1285 /* The TImode version of remove_non_convertible_regs. */
1288 timode_remove_non_convertible_regs (bitmap candidates)
1292 bitmap regs = BITMAP_ALLOC (NULL);
1294 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1296 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1297 rtx dest = SET_DEST (def_set);
1298 rtx src = SET_SRC (def_set);
1301 || bitmap_bit_p (regs, REGNO (dest))
1302 || HARD_REGISTER_P (dest))
1304 || bitmap_bit_p (regs, REGNO (src))
1305 || HARD_REGISTER_P (src)))
1309 timode_check_non_convertible_regs (candidates, regs,
1313 timode_check_non_convertible_regs (candidates, regs,
1317 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1319 for (df_ref def = DF_REG_DEF_CHAIN (id);
1321 def = DF_REF_NEXT_REG (def))
1322 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1325 fprintf (dump_file, "Removing insn %d from candidates list\n",
1326 DF_REF_INSN_UID (def));
1328 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1331 for (df_ref ref = DF_REG_USE_CHAIN (id);
1333 ref = DF_REF_NEXT_REG (ref))
1334 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1337 fprintf (dump_file, "Removing insn %d from candidates list\n",
1338 DF_REF_INSN_UID (ref));
1340 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1347 /* For a given bitmap of insn UIDs scans all instruction and
1348 remove insn from CANDIDATES in case it has both convertible
1349 and not convertible definitions.
1351 All insns in a bitmap are conversion candidates according to
1352 scalar_to_vector_candidate_p. Currently it implies all insns
1356 remove_non_convertible_regs (bitmap candidates)
1359 timode_remove_non_convertible_regs (candidates);
1361 dimode_remove_non_convertible_regs (candidates);
1368 virtual ~scalar_chain ();
1370 static unsigned max_id;
1372 /* ID of a chain. */
1373 unsigned int chain_id;
1374 /* A queue of instructions to be included into a chain. */
1376 /* Instructions included into a chain. */
1378 /* All registers defined by a chain. */
1380 /* Registers used in both vector and sclar modes. */
1383 void build (bitmap candidates, unsigned insn_uid);
1384 virtual int compute_convert_gain () = 0;
1388 void add_to_queue (unsigned insn_uid);
1389 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1392 void add_insn (bitmap candidates, unsigned insn_uid);
1393 void analyze_register_chain (bitmap candidates, df_ref ref);
1394 virtual void mark_dual_mode_def (df_ref def) = 0;
1395 virtual void convert_insn (rtx_insn *insn) = 0;
1396 virtual void convert_registers () = 0;
1399 class dimode_scalar_chain : public scalar_chain
1402 int compute_convert_gain ();
1404 void mark_dual_mode_def (df_ref def);
1405 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1406 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1407 void convert_insn (rtx_insn *insn);
1408 void convert_op (rtx *op, rtx_insn *insn);
1409 void convert_reg (unsigned regno);
1410 void make_vector_copies (unsigned regno);
1411 void convert_registers ();
1412 int vector_const_cost (rtx exp);
1415 class timode_scalar_chain : public scalar_chain
1418 /* Convert from TImode to V1TImode is always faster. */
1419 int compute_convert_gain () { return 1; }
1422 void mark_dual_mode_def (df_ref def);
1423 void fix_debug_reg_uses (rtx reg);
1424 void convert_insn (rtx_insn *insn);
1425 /* We don't convert registers to difference size. */
1426 void convert_registers () {}
1429 unsigned scalar_chain::max_id = 0;
1431 /* Initialize new chain. */
1433 scalar_chain::scalar_chain ()
1435 chain_id = ++max_id;
1438 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1440 bitmap_obstack_initialize (NULL);
1441 insns = BITMAP_ALLOC (NULL);
1442 defs = BITMAP_ALLOC (NULL);
1443 defs_conv = BITMAP_ALLOC (NULL);
1447 /* Free chain's data. */
1449 scalar_chain::~scalar_chain ()
1451 BITMAP_FREE (insns);
1453 BITMAP_FREE (defs_conv);
1454 bitmap_obstack_release (NULL);
1457 /* Add instruction into chains' queue. */
1460 scalar_chain::add_to_queue (unsigned insn_uid)
1462 if (bitmap_bit_p (insns, insn_uid)
1463 || bitmap_bit_p (queue, insn_uid))
1467 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1468 insn_uid, chain_id);
1469 bitmap_set_bit (queue, insn_uid);
1472 /* For DImode conversion, mark register defined by DEF as requiring
1476 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1478 gcc_assert (DF_REF_REG_DEF_P (def));
1480 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1485 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1486 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1488 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1491 /* For TImode conversion, it is unused. */
1494 timode_scalar_chain::mark_dual_mode_def (df_ref)
1499 /* Check REF's chain to add new insns into a queue
1500 and find registers requiring conversion. */
1503 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1507 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1508 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1509 add_to_queue (DF_REF_INSN_UID (ref));
1511 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1513 unsigned uid = DF_REF_INSN_UID (chain->ref);
1515 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1518 if (!DF_REF_REG_MEM_P (chain->ref))
1520 if (bitmap_bit_p (insns, uid))
1523 if (bitmap_bit_p (candidates, uid))
1530 if (DF_REF_REG_DEF_P (chain->ref))
1533 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1534 DF_REF_REGNO (chain->ref), uid);
1535 mark_dual_mode_def (chain->ref);
1540 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1541 DF_REF_REGNO (chain->ref), uid);
1542 mark_dual_mode_def (ref);
1547 /* Add instruction into a chain. */
1550 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1552 if (bitmap_bit_p (insns, insn_uid))
1556 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1558 bitmap_set_bit (insns, insn_uid);
1560 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1561 rtx def_set = single_set (insn);
1562 if (def_set && REG_P (SET_DEST (def_set))
1563 && !HARD_REGISTER_P (SET_DEST (def_set)))
1564 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1568 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1569 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1570 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1572 def = DF_REF_NEXT_REG (def))
1573 analyze_register_chain (candidates, def);
1574 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1575 if (!DF_REF_REG_MEM_P (ref))
1576 analyze_register_chain (candidates, ref);
1579 /* Build new chain starting from insn INSN_UID recursively
1580 adding all dependent uses and definitions. */
1583 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1585 queue = BITMAP_ALLOC (NULL);
1586 bitmap_set_bit (queue, insn_uid);
1589 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1591 while (!bitmap_empty_p (queue))
1593 insn_uid = bitmap_first_set_bit (queue);
1594 bitmap_clear_bit (queue, insn_uid);
1595 bitmap_clear_bit (candidates, insn_uid);
1596 add_insn (candidates, insn_uid);
1601 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1602 fprintf (dump_file, " insns: ");
1603 dump_bitmap (dump_file, insns);
1604 if (!bitmap_empty_p (defs_conv))
1608 const char *comma = "";
1609 fprintf (dump_file, " defs to convert: ");
1610 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1612 fprintf (dump_file, "%sr%d", comma, id);
1615 fprintf (dump_file, "\n");
1619 BITMAP_FREE (queue);
1622 /* Return a cost of building a vector costant
1623 instead of using a scalar one. */
1626 dimode_scalar_chain::vector_const_cost (rtx exp)
1628 gcc_assert (CONST_INT_P (exp));
1630 if (standard_sse_constant_p (exp, V2DImode))
1631 return COSTS_N_INSNS (1);
1632 return ix86_cost->sse_load[1];
1635 /* Compute a gain for chain conversion. */
1638 dimode_scalar_chain::compute_convert_gain ()
1646 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1648 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1650 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1651 rtx def_set = single_set (insn);
1652 rtx src = SET_SRC (def_set);
1653 rtx dst = SET_DEST (def_set);
1655 if (REG_P (src) && REG_P (dst))
1656 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1657 else if (REG_P (src) && MEM_P (dst))
1658 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1659 else if (MEM_P (src) && REG_P (dst))
1660 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1661 else if (GET_CODE (src) == ASHIFT
1662 || GET_CODE (src) == ASHIFTRT
1663 || GET_CODE (src) == LSHIFTRT)
1665 if (CONST_INT_P (XEXP (src, 0)))
1666 gain -= vector_const_cost (XEXP (src, 0));
1667 if (CONST_INT_P (XEXP (src, 1)))
1669 gain += ix86_cost->shift_const;
1670 if (INTVAL (XEXP (src, 1)) >= 32)
1671 gain -= COSTS_N_INSNS (1);
1674 /* Additional gain for omitting two CMOVs. */
1675 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1677 else if (GET_CODE (src) == PLUS
1678 || GET_CODE (src) == MINUS
1679 || GET_CODE (src) == IOR
1680 || GET_CODE (src) == XOR
1681 || GET_CODE (src) == AND)
1683 gain += ix86_cost->add;
1684 /* Additional gain for andnot for targets without BMI. */
1685 if (GET_CODE (XEXP (src, 0)) == NOT
1687 gain += 2 * ix86_cost->add;
1689 if (CONST_INT_P (XEXP (src, 0)))
1690 gain -= vector_const_cost (XEXP (src, 0));
1691 if (CONST_INT_P (XEXP (src, 1)))
1692 gain -= vector_const_cost (XEXP (src, 1));
1694 else if (GET_CODE (src) == NEG
1695 || GET_CODE (src) == NOT)
1696 gain += ix86_cost->add - COSTS_N_INSNS (1);
1697 else if (GET_CODE (src) == COMPARE)
1699 /* Assume comparison cost is the same. */
1701 else if (CONST_INT_P (src))
1704 gain += COSTS_N_INSNS (2);
1705 else if (MEM_P (dst))
1706 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1707 gain -= vector_const_cost (src);
1714 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1716 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1717 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1720 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1725 fprintf (dump_file, " Total gain: %d\n", gain);
1730 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1733 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1736 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1738 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1740 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1743 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1744 else if (fmt[i] == 'E')
1745 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1746 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1753 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1756 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1757 rtx reg, rtx new_reg)
1759 replace_with_subreg (single_set (insn), reg, new_reg);
1762 /* Insert generated conversion instruction sequence INSNS
1763 after instruction AFTER. New BB may be required in case
1764 instruction has EH region attached. */
1767 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1769 if (!control_flow_insn_p (after))
1771 emit_insn_after (insns, after);
1775 basic_block bb = BLOCK_FOR_INSN (after);
1776 edge e = find_fallthru_edge (bb->succs);
1779 basic_block new_bb = split_edge (e);
1780 emit_insn_after (insns, BB_HEAD (new_bb));
1783 /* Make vector copies for all register REGNO definitions
1784 and replace its uses in a chain. */
1787 dimode_scalar_chain::make_vector_copies (unsigned regno)
1789 rtx reg = regno_reg_rtx[regno];
1790 rtx vreg = gen_reg_rtx (DImode);
1791 bool count_reg = false;
1794 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1795 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1799 /* Detect the count register of a shift instruction. */
1800 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1801 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1803 rtx_insn *insn = DF_REF_INSN (use);
1804 rtx def_set = single_set (insn);
1806 gcc_assert (def_set);
1808 rtx src = SET_SRC (def_set);
1810 if ((GET_CODE (src) == ASHIFT
1811 || GET_CODE (src) == ASHIFTRT
1812 || GET_CODE (src) == LSHIFTRT)
1813 && !CONST_INT_P (XEXP (src, 1))
1814 && reg_or_subregno (XEXP (src, 1)) == regno)
1821 rtx qreg = gen_lowpart (QImode, reg);
1822 rtx tmp = gen_reg_rtx (SImode);
1824 if (TARGET_ZERO_EXTEND_WITH_AND
1825 && optimize_function_for_speed_p (cfun))
1827 emit_move_insn (tmp, const0_rtx);
1828 emit_insn (gen_movstrictqi
1829 (gen_lowpart (QImode, tmp), qreg));
1832 emit_insn (gen_rtx_SET
1833 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1835 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1837 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1838 emit_move_insn (slot, tmp);
1839 tmp = copy_rtx (slot);
1842 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1844 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1846 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1847 emit_move_insn (adjust_address (tmp, SImode, 0),
1848 gen_rtx_SUBREG (SImode, reg, 0));
1849 emit_move_insn (adjust_address (tmp, SImode, 4),
1850 gen_rtx_SUBREG (SImode, reg, 4));
1851 emit_move_insn (vreg, tmp);
1853 else if (TARGET_SSE4_1)
1855 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1856 CONST0_RTX (V4SImode),
1857 gen_rtx_SUBREG (SImode, reg, 0)));
1858 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1859 gen_rtx_SUBREG (V4SImode, vreg, 0),
1860 gen_rtx_SUBREG (SImode, reg, 4),
1865 rtx tmp = gen_reg_rtx (DImode);
1866 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1867 CONST0_RTX (V4SImode),
1868 gen_rtx_SUBREG (SImode, reg, 0)));
1869 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1870 CONST0_RTX (V4SImode),
1871 gen_rtx_SUBREG (SImode, reg, 4)));
1872 emit_insn (gen_vec_interleave_lowv4si
1873 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1874 gen_rtx_SUBREG (V4SImode, vreg, 0),
1875 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1877 rtx_insn *seq = get_insns ();
1879 rtx_insn *insn = DF_REF_INSN (ref);
1880 emit_conversion_insns (seq, insn);
1884 " Copied r%d to a vector register r%d for insn %d\n",
1885 regno, REGNO (vreg), INSN_UID (insn));
1888 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1889 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1891 rtx_insn *insn = DF_REF_INSN (ref);
1894 rtx def_set = single_set (insn);
1895 gcc_assert (def_set);
1897 rtx src = SET_SRC (def_set);
1899 if ((GET_CODE (src) == ASHIFT
1900 || GET_CODE (src) == ASHIFTRT
1901 || GET_CODE (src) == LSHIFTRT)
1902 && !CONST_INT_P (XEXP (src, 1))
1903 && reg_or_subregno (XEXP (src, 1)) == regno)
1904 XEXP (src, 1) = vreg;
1907 replace_with_subreg_in_insn (insn, reg, vreg);
1910 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1911 regno, REGNO (vreg), INSN_UID (insn));
1915 /* Convert all definitions of register REGNO
1916 and fix its uses. Scalar copies may be created
1917 in case register is used in not convertible insn. */
1920 dimode_scalar_chain::convert_reg (unsigned regno)
1922 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1923 rtx reg = regno_reg_rtx[regno];
1924 rtx scopy = NULL_RTX;
1928 conv = BITMAP_ALLOC (NULL);
1929 bitmap_copy (conv, insns);
1932 scopy = gen_reg_rtx (DImode);
1934 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1936 rtx_insn *insn = DF_REF_INSN (ref);
1937 rtx def_set = single_set (insn);
1938 rtx src = SET_SRC (def_set);
1939 rtx reg = DF_REF_REG (ref);
1943 replace_with_subreg_in_insn (insn, reg, reg);
1944 bitmap_clear_bit (conv, INSN_UID (insn));
1950 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1952 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1953 emit_move_insn (tmp, reg);
1954 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1955 adjust_address (tmp, SImode, 0));
1956 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1957 adjust_address (tmp, SImode, 4));
1959 else if (TARGET_SSE4_1)
1961 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1964 (gen_rtx_SUBREG (SImode, scopy, 0),
1965 gen_rtx_VEC_SELECT (SImode,
1966 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1968 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1971 (gen_rtx_SUBREG (SImode, scopy, 4),
1972 gen_rtx_VEC_SELECT (SImode,
1973 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1977 rtx vcopy = gen_reg_rtx (V2DImode);
1978 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1979 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1980 gen_rtx_SUBREG (SImode, vcopy, 0));
1981 emit_move_insn (vcopy,
1982 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1983 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1984 gen_rtx_SUBREG (SImode, vcopy, 0));
1986 rtx_insn *seq = get_insns ();
1988 emit_conversion_insns (seq, insn);
1992 " Copied r%d to a scalar register r%d for insn %d\n",
1993 regno, REGNO (scopy), INSN_UID (insn));
1997 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1998 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
2000 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
2002 rtx_insn *insn = DF_REF_INSN (ref);
2004 rtx def_set = single_set (insn);
2005 gcc_assert (def_set);
2007 rtx src = SET_SRC (def_set);
2008 rtx dst = SET_DEST (def_set);
2010 if ((GET_CODE (src) == ASHIFT
2011 || GET_CODE (src) == ASHIFTRT
2012 || GET_CODE (src) == LSHIFTRT)
2013 && !CONST_INT_P (XEXP (src, 1))
2014 && reg_or_subregno (XEXP (src, 1)) == regno)
2016 rtx tmp2 = gen_reg_rtx (V2DImode);
2021 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
2022 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
2026 = gen_rtx_CONST_VECTOR (V2DImode,
2027 gen_rtvec (2, GEN_INT (0xff),
2030 = validize_mem (force_const_mem (V2DImode, vec_cst));
2032 emit_insn (gen_rtx_SET
2034 gen_rtx_AND (V2DImode,
2035 gen_rtx_SUBREG (V2DImode, reg, 0),
2038 rtx_insn *seq = get_insns ();
2041 emit_insn_before (seq, insn);
2043 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
2045 else if (!MEM_P (dst) || !REG_P (src))
2046 replace_with_subreg_in_insn (insn, reg, reg);
2048 bitmap_clear_bit (conv, INSN_UID (insn));
2051 /* Skip debug insns and uninitialized uses. */
2052 else if (DF_REF_CHAIN (ref)
2053 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2056 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2057 df_insn_rescan (DF_REF_INSN (ref));
2063 /* Convert operand OP in INSN. We should handle
2064 memory operands and uninitialized registers.
2065 All other register uses are converted during
2066 registers conversion. */
2069 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2071 *op = copy_rtx_if_shared (*op);
2073 if (GET_CODE (*op) == NOT)
2075 convert_op (&XEXP (*op, 0), insn);
2076 PUT_MODE (*op, V2DImode);
2078 else if (MEM_P (*op))
2080 rtx tmp = gen_reg_rtx (DImode);
2082 emit_insn_before (gen_move_insn (tmp, *op), insn);
2083 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2086 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2087 INSN_UID (insn), REGNO (tmp));
2089 else if (REG_P (*op))
2091 /* We may have not converted register usage in case
2092 this register has no definition. Otherwise it
2093 should be converted in convert_reg. */
2095 FOR_EACH_INSN_USE (ref, insn)
2096 if (DF_REF_REGNO (ref) == REGNO (*op))
2098 gcc_assert (!DF_REF_CHAIN (ref));
2101 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2103 else if (CONST_INT_P (*op))
2106 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2108 /* Prefer all ones vector in case of -1. */
2109 if (constm1_operand (*op, GET_MODE (*op)))
2110 vec_cst = CONSTM1_RTX (V2DImode);
2112 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2113 gen_rtvec (2, *op, const0_rtx));
2115 if (!standard_sse_constant_p (vec_cst, V2DImode))
2118 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2119 rtx_insn *seq = get_insns ();
2121 emit_insn_before (seq, insn);
2124 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2129 gcc_assert (SUBREG_P (*op));
2130 gcc_assert (GET_MODE (*op) == V2DImode);
2134 /* Convert INSN to vector mode. */
2137 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2139 rtx def_set = single_set (insn);
2140 rtx src = SET_SRC (def_set);
2141 rtx dst = SET_DEST (def_set);
2144 if (MEM_P (dst) && !REG_P (src))
2146 /* There are no scalar integer instructions and therefore
2147 temporary register usage is required. */
2148 rtx tmp = gen_reg_rtx (DImode);
2149 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2150 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2153 switch (GET_CODE (src))
2158 convert_op (&XEXP (src, 0), insn);
2159 PUT_MODE (src, V2DImode);
2167 convert_op (&XEXP (src, 0), insn);
2168 convert_op (&XEXP (src, 1), insn);
2169 PUT_MODE (src, V2DImode);
2173 src = XEXP (src, 0);
2174 convert_op (&src, insn);
2175 subreg = gen_reg_rtx (V2DImode);
2176 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2177 src = gen_rtx_MINUS (V2DImode, subreg, src);
2181 src = XEXP (src, 0);
2182 convert_op (&src, insn);
2183 subreg = gen_reg_rtx (V2DImode);
2184 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2185 src = gen_rtx_XOR (V2DImode, src, subreg);
2190 convert_op (&src, insn);
2195 convert_op (&src, insn);
2199 gcc_assert (GET_MODE (src) == V2DImode);
2203 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2205 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2206 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2209 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2211 subreg = copy_rtx_if_shared (src);
2212 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2213 copy_rtx_if_shared (subreg),
2214 copy_rtx_if_shared (subreg)),
2216 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2217 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2218 copy_rtx_if_shared (src)),
2223 convert_op (&src, insn);
2230 SET_SRC (def_set) = src;
2231 SET_DEST (def_set) = dst;
2233 /* Drop possible dead definitions. */
2234 PATTERN (insn) = def_set;
2236 INSN_CODE (insn) = -1;
2237 recog_memoized (insn);
2238 df_insn_rescan (insn);
2241 /* Fix uses of converted REG in debug insns. */
2244 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2246 if (!flag_var_tracking)
2250 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2252 rtx_insn *insn = DF_REF_INSN (ref);
2253 /* Make sure the next ref is for a different instruction,
2254 so that we're not affected by the rescan. */
2255 next = DF_REF_NEXT_REG (ref);
2256 while (next && DF_REF_INSN (next) == insn)
2257 next = DF_REF_NEXT_REG (next);
2259 if (DEBUG_INSN_P (insn))
2261 /* It may be a debug insn with a TImode variable in
2263 bool changed = false;
2264 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2266 rtx *loc = DF_REF_LOC (ref);
2267 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2269 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2274 df_insn_rescan (insn);
2279 /* Convert INSN from TImode to V1T1mode. */
2282 timode_scalar_chain::convert_insn (rtx_insn *insn)
2284 rtx def_set = single_set (insn);
2285 rtx src = SET_SRC (def_set);
2286 rtx dst = SET_DEST (def_set);
2288 switch (GET_CODE (dst))
2292 rtx tmp = find_reg_equal_equiv_note (insn);
2294 PUT_MODE (XEXP (tmp, 0), V1TImode);
2295 PUT_MODE (dst, V1TImode);
2296 fix_debug_reg_uses (dst);
2300 PUT_MODE (dst, V1TImode);
2307 switch (GET_CODE (src))
2310 PUT_MODE (src, V1TImode);
2311 /* Call fix_debug_reg_uses only if SRC is never defined. */
2312 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2313 fix_debug_reg_uses (src);
2317 PUT_MODE (src, V1TImode);
2320 case CONST_WIDE_INT:
2321 if (NONDEBUG_INSN_P (insn))
2323 /* Since there are no instructions to store 128-bit constant,
2324 temporary register usage is required. */
2325 rtx tmp = gen_reg_rtx (V1TImode);
2327 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2328 src = validize_mem (force_const_mem (V1TImode, src));
2329 rtx_insn *seq = get_insns ();
2332 emit_insn_before (seq, insn);
2333 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2339 switch (standard_sse_constant_p (src, TImode))
2342 src = CONST0_RTX (GET_MODE (dst));
2345 src = CONSTM1_RTX (GET_MODE (dst));
2350 if (NONDEBUG_INSN_P (insn))
2352 rtx tmp = gen_reg_rtx (V1TImode);
2353 /* Since there are no instructions to store standard SSE
2354 constant, temporary register usage is required. */
2355 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2364 SET_SRC (def_set) = src;
2365 SET_DEST (def_set) = dst;
2367 /* Drop possible dead definitions. */
2368 PATTERN (insn) = def_set;
2370 INSN_CODE (insn) = -1;
2371 recog_memoized (insn);
2372 df_insn_rescan (insn);
2376 dimode_scalar_chain::convert_registers ()
2381 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2384 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2385 make_vector_copies (id);
2388 /* Convert whole chain creating required register
2389 conversions and copies. */
2392 scalar_chain::convert ()
2396 int converted_insns = 0;
2398 if (!dbg_cnt (stv_conversion))
2402 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2404 convert_registers ();
2406 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2408 convert_insn (DF_INSN_UID_GET (id)->insn);
2412 return converted_insns;
2415 /* Main STV pass function. Find and convert scalar
2416 instructions into vector mode when profitable. */
2419 convert_scalars_to_vector ()
2423 int converted_insns = 0;
2425 bitmap_obstack_initialize (NULL);
2426 candidates = BITMAP_ALLOC (NULL);
2428 calculate_dominance_info (CDI_DOMINATORS);
2429 df_set_flags (DF_DEFER_INSN_RESCAN);
2430 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2431 df_md_add_problem ();
2434 /* Find all instructions we want to convert into vector mode. */
2436 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2438 FOR_EACH_BB_FN (bb, cfun)
2441 FOR_BB_INSNS (bb, insn)
2442 if (scalar_to_vector_candidate_p (insn))
2445 fprintf (dump_file, " insn %d is marked as a candidate\n",
2448 bitmap_set_bit (candidates, INSN_UID (insn));
2452 remove_non_convertible_regs (candidates);
2454 if (bitmap_empty_p (candidates))
2456 fprintf (dump_file, "There are no candidates for optimization.\n");
2458 while (!bitmap_empty_p (candidates))
2460 unsigned uid = bitmap_first_set_bit (candidates);
2461 scalar_chain *chain;
2464 chain = new timode_scalar_chain;
2466 chain = new dimode_scalar_chain;
2468 /* Find instructions chain we want to convert to vector mode.
2469 Check all uses and definitions to estimate all required
2471 chain->build (candidates, uid);
2473 if (chain->compute_convert_gain () > 0)
2474 converted_insns += chain->convert ();
2477 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2484 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2486 BITMAP_FREE (candidates);
2487 bitmap_obstack_release (NULL);
2488 df_process_deferred_rescans ();
2490 /* Conversion means we may have 128bit register spills/fills
2491 which require aligned stack. */
2492 if (converted_insns)
2494 if (crtl->stack_alignment_needed < 128)
2495 crtl->stack_alignment_needed = 128;
2496 if (crtl->stack_alignment_estimated < 128)
2497 crtl->stack_alignment_estimated = 128;
2498 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2500 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2501 parm; parm = DECL_CHAIN (parm))
2503 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2505 if (DECL_RTL_SET_P (parm)
2506 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2508 rtx r = DECL_RTL (parm);
2510 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2512 if (DECL_INCOMING_RTL (parm)
2513 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2515 rtx r = DECL_INCOMING_RTL (parm);
2517 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2527 const pass_data pass_data_insert_vzeroupper =
2529 RTL_PASS, /* type */
2530 "vzeroupper", /* name */
2531 OPTGROUP_NONE, /* optinfo_flags */
2532 TV_MACH_DEP, /* tv_id */
2533 0, /* properties_required */
2534 0, /* properties_provided */
2535 0, /* properties_destroyed */
2536 0, /* todo_flags_start */
2537 TODO_df_finish, /* todo_flags_finish */
2540 class pass_insert_vzeroupper : public rtl_opt_pass
2543 pass_insert_vzeroupper(gcc::context *ctxt)
2544 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2547 /* opt_pass methods: */
2548 virtual bool gate (function *)
2551 && TARGET_VZEROUPPER && flag_expensive_optimizations
2555 virtual unsigned int execute (function *)
2557 return rest_of_handle_insert_vzeroupper ();
2560 }; // class pass_insert_vzeroupper
2562 const pass_data pass_data_stv =
2564 RTL_PASS, /* type */
2566 OPTGROUP_NONE, /* optinfo_flags */
2567 TV_MACH_DEP, /* tv_id */
2568 0, /* properties_required */
2569 0, /* properties_provided */
2570 0, /* properties_destroyed */
2571 0, /* todo_flags_start */
2572 TODO_df_finish, /* todo_flags_finish */
2575 class pass_stv : public rtl_opt_pass
2578 pass_stv (gcc::context *ctxt)
2579 : rtl_opt_pass (pass_data_stv, ctxt),
2583 /* opt_pass methods: */
2584 virtual bool gate (function *)
2586 return (timode_p == !!TARGET_64BIT
2587 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2590 virtual unsigned int execute (function *)
2592 return convert_scalars_to_vector ();
2597 return new pass_stv (m_ctxt);
2600 void set_pass_param (unsigned int n, bool param)
2602 gcc_assert (n == 0);
2608 }; // class pass_stv
2613 make_pass_insert_vzeroupper (gcc::context *ctxt)
2615 return new pass_insert_vzeroupper (ctxt);
2619 make_pass_stv (gcc::context *ctxt)
2621 return new pass_stv (ctxt);
2624 /* Inserting ENDBRANCH instructions. */
2627 rest_of_insert_endbranch (void)
2629 timevar_push (TV_MACH_DEP);
2635 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2636 absent among function attributes. Later an optimization will be
2637 introduced to make analysis if an address of a static function is
2638 taken. A static function whose address is not taken will get a
2639 nocf_check attribute. This will allow to reduce the number of EB. */
2641 if (!lookup_attribute ("nocf_check",
2642 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2643 && (!flag_manual_endbr
2644 || lookup_attribute ("cf_check",
2645 DECL_ATTRIBUTES (cfun->decl)))
2646 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2648 /* Queue ENDBR insertion to x86_function_profiler. */
2649 if (crtl->profile && flag_fentry)
2650 cfun->machine->endbr_queued_at_entrance = true;
2653 cet_eb = gen_nop_endbr ();
2655 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2656 insn = BB_HEAD (bb);
2657 emit_insn_before (cet_eb, insn);
2662 FOR_EACH_BB_FN (bb, cfun)
2664 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2665 insn = NEXT_INSN (insn))
2670 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
2671 if (!need_endbr && !SIBLING_CALL_P (insn))
2673 rtx call = get_call_rtx_from (insn);
2674 rtx fnaddr = XEXP (call, 0);
2675 tree fndecl = NULL_TREE;
2677 /* Also generate ENDBRANCH for non-tail call which
2678 may return via indirect branch. */
2679 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
2680 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
2681 if (fndecl == NULL_TREE)
2682 fndecl = MEM_EXPR (fnaddr);
2684 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
2685 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
2687 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
2689 tree fntype = TREE_TYPE (fndecl);
2690 if (lookup_attribute ("indirect_return",
2691 TYPE_ATTRIBUTES (fntype)))
2697 /* Generate ENDBRANCH after CALL, which can return more than
2698 twice, setjmp-like functions. */
2700 cet_eb = gen_nop_endbr ();
2701 emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
2705 if (JUMP_P (insn) && flag_cet_switch)
2707 rtx target = JUMP_LABEL (insn);
2708 if (target == NULL_RTX || ANY_RETURN_P (target))
2711 /* Check the jump is a switch table. */
2712 rtx_insn *label = as_a<rtx_insn *> (target);
2713 rtx_insn *table = next_insn (label);
2714 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2717 /* For the indirect jump find out all places it jumps and insert
2718 ENDBRANCH there. It should be done under a special flag to
2719 control ENDBRANCH generation for switch stmts. */
2722 basic_block dest_blk;
2724 FOR_EACH_EDGE (e, ei, bb->succs)
2729 insn = BB_HEAD (dest_blk);
2730 gcc_assert (LABEL_P (insn));
2731 cet_eb = gen_nop_endbr ();
2732 emit_insn_after (cet_eb, insn);
2737 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2739 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2740 /* TODO. Check /s bit also. */
2742 cet_eb = gen_nop_endbr ();
2743 emit_insn_after (cet_eb, insn);
2749 timevar_pop (TV_MACH_DEP);
2755 const pass_data pass_data_insert_endbranch =
2757 RTL_PASS, /* type. */
2759 OPTGROUP_NONE, /* optinfo_flags. */
2760 TV_MACH_DEP, /* tv_id. */
2761 0, /* properties_required. */
2762 0, /* properties_provided. */
2763 0, /* properties_destroyed. */
2764 0, /* todo_flags_start. */
2765 0, /* todo_flags_finish. */
2768 class pass_insert_endbranch : public rtl_opt_pass
2771 pass_insert_endbranch (gcc::context *ctxt)
2772 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2775 /* opt_pass methods: */
2776 virtual bool gate (function *)
2778 return ((flag_cf_protection & CF_BRANCH));
2781 virtual unsigned int execute (function *)
2783 return rest_of_insert_endbranch ();
2786 }; // class pass_insert_endbranch
2791 make_pass_insert_endbranch (gcc::context *ctxt)
2793 return new pass_insert_endbranch (ctxt);
2796 /* Return true if a red-zone is in use. We can't use red-zone when
2797 there are local indirect jumps, like "indirect_jump" or "tablejump",
2798 which jumps to another place in the function, since "call" in the
2799 indirect thunk pushes the return address onto stack, destroying
2802 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
2803 for CALL, in red-zone, we can allow local indirect jumps with
2807 ix86_using_red_zone (void)
2809 return (TARGET_RED_ZONE
2810 && !TARGET_64BIT_MS_ABI
2811 && (!cfun->machine->has_local_indirect_jump
2812 || cfun->machine->indirect_branch_type == indirect_branch_keep));
2815 /* Return a string that documents the current -m options. The caller is
2816 responsible for freeing the string. */
2819 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2820 int flags, int flags2,
2821 const char *arch, const char *tune,
2822 enum fpmath_unit fpmath, bool add_nl_p)
2824 struct ix86_target_opts
2826 const char *option; /* option string */
2827 HOST_WIDE_INT mask; /* isa mask options */
2830 /* This table is ordered so that options like -msse4.2 that imply other
2831 ISAs come first. Target string will be displayed in the same order. */
2832 static struct ix86_target_opts isa2_opts[] =
2834 { "-mcx16", OPTION_MASK_ISA_CX16 },
2835 { "-mvaes", OPTION_MASK_ISA_VAES },
2836 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2837 { "-mpconfig", OPTION_MASK_ISA_PCONFIG },
2838 { "-mwbnoinvd", OPTION_MASK_ISA_WBNOINVD },
2839 { "-msgx", OPTION_MASK_ISA_SGX },
2840 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2841 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2842 { "-mhle", OPTION_MASK_ISA_HLE },
2843 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2844 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2845 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2846 { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B },
2847 { "-mwaitpkg", OPTION_MASK_ISA_WAITPKG },
2848 { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE },
2849 { "-mptwrite", OPTION_MASK_ISA_PTWRITE }
2851 static struct ix86_target_opts isa_opts[] =
2853 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2854 { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG },
2855 { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ },
2856 { "-mgfni", OPTION_MASK_ISA_GFNI },
2857 { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI },
2858 { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 },
2859 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2860 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2861 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2862 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2863 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2864 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2865 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2866 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2867 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2868 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2869 { "-mfma", OPTION_MASK_ISA_FMA },
2870 { "-mxop", OPTION_MASK_ISA_XOP },
2871 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2872 { "-mf16c", OPTION_MASK_ISA_F16C },
2873 { "-mavx", OPTION_MASK_ISA_AVX },
2874 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2875 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2876 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2877 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2878 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2879 { "-msse3", OPTION_MASK_ISA_SSE3 },
2880 { "-maes", OPTION_MASK_ISA_AES },
2881 { "-msha", OPTION_MASK_ISA_SHA },
2882 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2883 { "-msse2", OPTION_MASK_ISA_SSE2 },
2884 { "-msse", OPTION_MASK_ISA_SSE },
2885 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2886 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2887 { "-mmmx", OPTION_MASK_ISA_MMX },
2888 { "-mrtm", OPTION_MASK_ISA_RTM },
2889 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2890 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2891 { "-madx", OPTION_MASK_ISA_ADX },
2892 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2893 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2894 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2895 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2896 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2897 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2898 { "-mabm", OPTION_MASK_ISA_ABM },
2899 { "-mbmi", OPTION_MASK_ISA_BMI },
2900 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2901 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2902 { "-mtbm", OPTION_MASK_ISA_TBM },
2903 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2904 { "-msahf", OPTION_MASK_ISA_SAHF },
2905 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2906 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2907 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2908 { "-mpku", OPTION_MASK_ISA_PKU },
2909 { "-mlwp", OPTION_MASK_ISA_LWP },
2910 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2911 { "-mclwb", OPTION_MASK_ISA_CLWB },
2912 { "-mshstk", OPTION_MASK_ISA_SHSTK },
2913 { "-mmovdiri", OPTION_MASK_ISA_MOVDIRI }
2917 static struct ix86_target_opts flag_opts[] =
2919 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2920 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2921 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2922 { "-m80387", MASK_80387 },
2923 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2924 { "-malign-double", MASK_ALIGN_DOUBLE },
2925 { "-mcld", MASK_CLD },
2926 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2927 { "-mieee-fp", MASK_IEEE_FP },
2928 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2929 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2930 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2931 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2932 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2933 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2934 { "-mno-red-zone", MASK_NO_RED_ZONE },
2935 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2936 { "-mrecip", MASK_RECIP },
2937 { "-mrtd", MASK_RTD },
2938 { "-msseregparm", MASK_SSEREGPARM },
2939 { "-mstack-arg-probe", MASK_STACK_PROBE },
2940 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2941 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2942 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2943 { "-mvzeroupper", MASK_VZEROUPPER },
2944 { "-mstv", MASK_STV },
2945 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2946 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2947 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2950 /* Additional flag options. */
2951 static struct ix86_target_opts flag2_opts[] =
2953 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY }
2956 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2957 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2960 char isa2_other[40];
2961 char flags_other[40];
2962 char flags2_other[40];
2972 memset (opts, '\0', sizeof (opts));
2974 /* Add -march= option. */
2977 opts[num][0] = "-march=";
2978 opts[num++][1] = arch;
2981 /* Add -mtune= option. */
2984 opts[num][0] = "-mtune=";
2985 opts[num++][1] = tune;
2988 /* Add -m32/-m64/-mx32. */
2989 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2991 if ((isa & OPTION_MASK_ABI_64) != 0)
2995 isa &= ~ (OPTION_MASK_ISA_64BIT
2996 | OPTION_MASK_ABI_64
2997 | OPTION_MASK_ABI_X32);
3001 opts[num++][0] = abi;
3003 /* Pick out the options in isa2 options. */
3004 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
3006 if ((isa2 & isa2_opts[i].mask) != 0)
3008 opts[num++][0] = isa2_opts[i].option;
3009 isa2 &= ~ isa2_opts[i].mask;
3013 if (isa2 && add_nl_p)
3015 opts[num++][0] = isa2_other;
3016 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
3019 /* Pick out the options in isa options. */
3020 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3022 if ((isa & isa_opts[i].mask) != 0)
3024 opts[num++][0] = isa_opts[i].option;
3025 isa &= ~ isa_opts[i].mask;
3029 if (isa && add_nl_p)
3031 opts[num++][0] = isa_other;
3032 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
3035 /* Add flag options. */
3036 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3038 if ((flags & flag_opts[i].mask) != 0)
3040 opts[num++][0] = flag_opts[i].option;
3041 flags &= ~ flag_opts[i].mask;
3045 if (flags && add_nl_p)
3047 opts[num++][0] = flags_other;
3048 sprintf (flags_other, "(other flags: %#x)", flags);
3051 /* Add additional flag options. */
3052 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
3054 if ((flags2 & flag2_opts[i].mask) != 0)
3056 opts[num++][0] = flag2_opts[i].option;
3057 flags2 &= ~ flag2_opts[i].mask;
3061 if (flags2 && add_nl_p)
3063 opts[num++][0] = flags2_other;
3064 sprintf (flags2_other, "(other flags2: %#x)", flags2);
3067 /* Add -fpmath= option. */
3070 opts[num][0] = "-mfpmath=";
3071 switch ((int) fpmath)
3074 opts[num++][1] = "387";
3078 opts[num++][1] = "sse";
3081 case FPMATH_387 | FPMATH_SSE:
3082 opts[num++][1] = "sse+387";
3094 gcc_assert (num < ARRAY_SIZE (opts));
3096 /* Size the string. */
3098 sep_len = (add_nl_p) ? 3 : 1;
3099 for (i = 0; i < num; i++)
3102 for (j = 0; j < 2; j++)
3104 len += strlen (opts[i][j]);
3107 /* Build the string. */
3108 ret = ptr = (char *) xmalloc (len);
3111 for (i = 0; i < num; i++)
3115 for (j = 0; j < 2; j++)
3116 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3123 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3131 for (j = 0; j < 2; j++)
3134 memcpy (ptr, opts[i][j], len2[j]);
3136 line_len += len2[j];
3141 gcc_assert (ret + len >= ptr);
3146 /* Return true, if profiling code should be emitted before
3147 prologue. Otherwise it returns false.
3148 Note: For x86 with "hotfix" it is sorried. */
3150 ix86_profile_before_prologue (void)
3152 return flag_fentry != 0;
3155 /* Function that is callable from the debugger to print the current
3157 void ATTRIBUTE_UNUSED
3158 ix86_debug_options (void)
3160 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3161 target_flags, ix86_target_flags,
3162 ix86_arch_string,ix86_tune_string,
3167 fprintf (stderr, "%s\n\n", opts);
3171 fputs ("<no options>\n\n", stderr);
3176 static const char *stringop_alg_names[] = {
3178 #define DEF_ALG(alg, name) #name,
3179 #include "stringop.def"
3184 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3185 The string is of the following form (or comma separated list of it):
3187 strategy_alg:max_size:[align|noalign]
3189 where the full size range for the strategy is either [0, max_size] or
3190 [min_size, max_size], in which min_size is the max_size + 1 of the
3191 preceding range. The last size range must have max_size == -1.
3196 -mmemcpy-strategy=libcall:-1:noalign
3198 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3202 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3204 This is to tell the compiler to use the following strategy for memset
3205 1) when the expected size is between [1, 16], use rep_8byte strategy;
3206 2) when the size is between [17, 2048], use vector_loop;
3207 3) when the size is > 2048, use libcall. */
3209 struct stringop_size_range
3217 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3219 const struct stringop_algs *default_algs;
3220 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3221 char *curr_range_str, *next_range_str;
3222 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3226 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3228 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3230 curr_range_str = strategy_str;
3237 next_range_str = strchr (curr_range_str, ',');
3239 *next_range_str++ = '\0';
3241 if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs,
3244 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3248 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3250 error ("size ranges of option %qs should be increasing", opt);
3254 for (i = 0; i < last_alg; i++)
3255 if (!strcmp (alg_name, stringop_alg_names[i]))
3260 error ("wrong strategy name %qs specified for option %qs",
3263 auto_vec <const char *> candidates;
3264 for (i = 0; i < last_alg; i++)
3265 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3266 candidates.safe_push (stringop_alg_names[i]);
3270 = candidates_list_and_hint (alg_name, s, candidates);
3272 inform (input_location,
3273 "valid arguments to %qs are: %s; did you mean %qs?",
3276 inform (input_location, "valid arguments to %qs are: %s",
3282 if ((stringop_alg) i == rep_prefix_8_byte
3285 /* rep; movq isn't available in 32-bit code. */
3286 error ("strategy name %qs specified for option %qs "
3287 "not supported for 32-bit code", alg_name, opt);
3291 input_ranges[n].max = maxs;
3292 input_ranges[n].alg = (stringop_alg) i;
3293 if (!strcmp (align, "align"))
3294 input_ranges[n].noalign = false;
3295 else if (!strcmp (align, "noalign"))
3296 input_ranges[n].noalign = true;
3299 error ("unknown alignment %qs specified for option %qs", align, opt);
3303 curr_range_str = next_range_str;
3305 while (curr_range_str);
3307 if (input_ranges[n - 1].max != -1)
3309 error ("the max value for the last size range should be -1"
3310 " for option %qs", opt);
3314 if (n > MAX_STRINGOP_ALGS)
3316 error ("too many size ranges specified in option %qs", opt);
3320 /* Now override the default algs array. */
3321 for (i = 0; i < n; i++)
3323 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3324 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3325 = input_ranges[i].alg;
3326 *const_cast<int *>(&default_algs->size[i].noalign)
3327 = input_ranges[i].noalign;
3332 /* parse -mtune-ctrl= option. When DUMP is true,
3333 print the features that are explicitly set. */
3336 parse_mtune_ctrl_str (bool dump)
3338 if (!ix86_tune_ctrl_string)
3341 char *next_feature_string = NULL;
3342 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3343 char *orig = curr_feature_string;
3349 next_feature_string = strchr (curr_feature_string, ',');
3350 if (next_feature_string)
3351 *next_feature_string++ = '\0';
3352 if (*curr_feature_string == '^')
3354 curr_feature_string++;
3357 for (i = 0; i < X86_TUNE_LAST; i++)
3359 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3361 ix86_tune_features[i] = !clear;
3363 fprintf (stderr, "Explicitly %s feature %s\n",
3364 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3368 if (i == X86_TUNE_LAST)
3369 error ("unknown parameter to option -mtune-ctrl: %s",
3370 clear ? curr_feature_string - 1 : curr_feature_string);
3371 curr_feature_string = next_feature_string;
3373 while (curr_feature_string);
3377 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3381 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3383 unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune;
3386 for (i = 0; i < X86_TUNE_LAST; ++i)
3388 if (ix86_tune_no_default)
3389 ix86_tune_features[i] = 0;
3391 ix86_tune_features[i]
3392 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3397 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3398 for (i = 0; i < X86_TUNE_LAST; i++)
3399 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3400 ix86_tune_features[i] ? "on" : "off");
3403 parse_mtune_ctrl_str (dump);
3407 /* Default align_* from the processor table. */
3410 ix86_default_align (struct gcc_options *opts)
3412 /* -falign-foo without argument: supply one. */
3413 if (opts->x_flag_align_loops && !opts->x_str_align_loops)
3414 opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop;
3415 if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
3416 opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump;
3417 if (opts->x_flag_align_labels && !opts->x_str_align_labels)
3418 opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label;
3419 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
3420 opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func;
3423 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3426 ix86_override_options_after_change (void)
3428 ix86_default_align (&global_options);
3433 /* Override various settings based on options. If MAIN_ARGS_P, the
3434 options are from the command line, otherwise they are from
3435 attributes. Return true if there's an error related to march
3439 ix86_option_override_internal (bool main_args_p,
3440 struct gcc_options *opts,
3441 struct gcc_options *opts_set)
3444 unsigned HOST_WIDE_INT ix86_arch_mask;
3445 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3447 /* -mrecip options. */
3450 const char *string; /* option name */
3451 unsigned int mask; /* mask bits to set */
3453 const recip_options[] =
3455 { "all", RECIP_MASK_ALL },
3456 { "none", RECIP_MASK_NONE },
3457 { "div", RECIP_MASK_DIV },
3458 { "sqrt", RECIP_MASK_SQRT },
3459 { "vec-div", RECIP_MASK_VEC_DIV },
3460 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3464 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3465 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3466 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3467 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3468 #ifdef TARGET_BI_ARCH
3471 #if TARGET_BI_ARCH == 1
3472 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3473 is on and OPTION_MASK_ABI_X32 is off. We turn off
3474 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3476 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3477 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3479 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3480 on and OPTION_MASK_ABI_64 is off. We turn off
3481 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3482 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3483 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3484 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3485 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3487 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3488 && TARGET_IAMCU_P (opts->x_target_flags))
3489 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3490 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3494 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3496 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3497 OPTION_MASK_ABI_64 for TARGET_X32. */
3498 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3499 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3501 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3502 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3503 | OPTION_MASK_ABI_X32
3504 | OPTION_MASK_ABI_64);
3505 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3507 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3508 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3509 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3510 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3513 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3514 SUBTARGET_OVERRIDE_OPTIONS;
3517 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3518 SUBSUBTARGET_OVERRIDE_OPTIONS;
3521 /* -fPIC is the default for x86_64. */
3522 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 opts->x_flag_pic = 2;
3525 /* Need to check -mtune=generic first. */
3526 if (opts->x_ix86_tune_string)
3528 /* As special support for cross compilers we read -mtune=native
3529 as -mtune=generic. With native compilers we won't see the
3530 -mtune=native, as it was changed by the driver. */
3531 if (!strcmp (opts->x_ix86_tune_string, "native"))
3533 opts->x_ix86_tune_string = "generic";
3535 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3536 warning (OPT_Wdeprecated,
3538 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3539 "or %<-mtune=generic%> instead as appropriate")
3540 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3541 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3542 " instead as appropriate"));
3546 if (opts->x_ix86_arch_string)
3547 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3548 if (!opts->x_ix86_tune_string)
3550 opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT];
3551 ix86_tune_defaulted = 1;
3554 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3555 or defaulted. We need to use a sensible tune option. */
3556 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3558 opts->x_ix86_tune_string = "generic";
3562 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3563 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3565 /* rep; movq isn't available in 32-bit code. */
3566 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3567 opts->x_ix86_stringop_alg = no_stringop;
3570 if (!opts->x_ix86_arch_string)
3571 opts->x_ix86_arch_string
3572 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3573 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3575 ix86_arch_specified = 1;
3577 if (opts_set->x_ix86_pmode)
3579 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3580 && opts->x_ix86_pmode == PMODE_SI)
3581 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3582 && opts->x_ix86_pmode == PMODE_DI))
3583 error ("address mode %qs not supported in the %s bit mode",
3584 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3585 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3588 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3589 ? PMODE_DI : PMODE_SI;
3591 if (!opts_set->x_ix86_abi)
3592 opts->x_ix86_abi = DEFAULT_ABI;
3594 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3595 error ("-mabi=ms not supported with X32 ABI");
3596 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3598 if ((opts->x_flag_sanitize & SANITIZE_USER_ADDRESS) && opts->x_ix86_abi == MS_ABI)
3599 error ("%<-mabi=ms%> not supported with %<-fsanitize=address%>");
3600 if ((opts->x_flag_sanitize & SANITIZE_KERNEL_ADDRESS) && opts->x_ix86_abi == MS_ABI)
3601 error ("%<-mabi=ms%> not supported with %<-fsanitize=kernel-address%>");
3602 if ((opts->x_flag_sanitize & SANITIZE_THREAD) && opts->x_ix86_abi == MS_ABI)
3603 error ("%<-mabi=ms%> not supported with %<-fsanitize=thread%>");
3605 /* For targets using ms ABI enable ms-extensions, if not
3606 explicit turned off. For non-ms ABI we turn off this
3608 if (!opts_set->x_flag_ms_extensions)
3609 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3611 if (opts_set->x_ix86_cmodel)
3613 switch (opts->x_ix86_cmodel)
3617 if (opts->x_flag_pic)
3618 opts->x_ix86_cmodel = CM_SMALL_PIC;
3619 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3620 error ("code model %qs not supported in the %s bit mode",
3626 if (opts->x_flag_pic)
3627 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3628 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3629 error ("code model %qs not supported in the %s bit mode",
3631 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3632 error ("code model %qs not supported in x32 mode",
3638 if (opts->x_flag_pic)
3639 opts->x_ix86_cmodel = CM_LARGE_PIC;
3640 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3641 error ("code model %qs not supported in the %s bit mode",
3643 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3644 error ("code model %qs not supported in x32 mode",
3649 if (opts->x_flag_pic)
3650 error ("code model %s does not support PIC mode", "32");
3651 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3652 error ("code model %qs not supported in the %s bit mode",
3657 if (opts->x_flag_pic)
3659 error ("code model %s does not support PIC mode", "kernel");
3660 opts->x_ix86_cmodel = CM_32;
3662 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3663 error ("code model %qs not supported in the %s bit mode",
3673 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3674 use of rip-relative addressing. This eliminates fixups that
3675 would otherwise be needed if this object is to be placed in a
3676 DLL, and is essentially just as efficient as direct addressing. */
3677 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3678 && (TARGET_RDOS || TARGET_PECOFF))
3679 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3680 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3681 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3683 opts->x_ix86_cmodel = CM_32;
3685 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3687 error ("-masm=intel not supported in this configuration");
3688 opts->x_ix86_asm_dialect = ASM_ATT;
3690 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3691 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3692 sorry ("%i-bit mode not compiled in",
3693 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3695 for (i = 0; i < pta_size; i++)
3696 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3698 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3701 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3703 : G_("%<generic%> CPU can be used only for "
3704 "%<target(\"tune=\")%> attribute"));
3707 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3710 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3712 : G_("%<intel%> CPU can be used only for "
3713 "%<target(\"tune=\")%> attribute"));
3717 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3718 && !((processor_alias_table[i].flags & PTA_64BIT) != 0))
3720 error ("CPU you selected does not support x86-64 "
3725 ix86_schedule = processor_alias_table[i].schedule;
3726 ix86_arch = processor_alias_table[i].processor;
3727 /* Default cpu tuning to the architecture. */
3728 ix86_tune = ix86_arch;
3730 if (((processor_alias_table[i].flags & PTA_MMX) != 0)
3731 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3732 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3733 if (((processor_alias_table[i].flags & PTA_3DNOW) != 0)
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3736 if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0)
3737 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3738 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3739 if (((processor_alias_table[i].flags & PTA_SSE) != 0)
3740 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3741 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3742 if (((processor_alias_table[i].flags & PTA_SSE2) != 0)
3743 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3745 if (((processor_alias_table[i].flags & PTA_SSE3) != 0)
3746 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3747 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3748 if (((processor_alias_table[i].flags & PTA_SSSE3) != 0)
3749 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3750 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3751 if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0)
3752 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3753 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3754 if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0)
3755 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3756 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3757 if (((processor_alias_table[i].flags & PTA_AVX) != 0)
3758 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3759 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3760 if (((processor_alias_table[i].flags & PTA_AVX2) != 0)
3761 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3762 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3763 if (((processor_alias_table[i].flags & PTA_FMA) != 0)
3764 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3765 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3766 if (((processor_alias_table[i].flags & PTA_SSE4A) != 0)
3767 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3768 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3769 if (((processor_alias_table[i].flags & PTA_FMA4) != 0)
3770 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3771 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3772 if (((processor_alias_table[i].flags & PTA_XOP) != 0)
3773 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3774 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3775 if (((processor_alias_table[i].flags & PTA_LWP) != 0)
3776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3778 if (((processor_alias_table[i].flags & PTA_ABM) != 0)
3779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3781 if (((processor_alias_table[i].flags & PTA_BMI) != 0)
3782 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3784 if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0)
3785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3787 if (((processor_alias_table[i].flags & PTA_TBM) != 0)
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3790 if (((processor_alias_table[i].flags & PTA_BMI2) != 0)
3791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3793 if (((processor_alias_table[i].flags & PTA_CX16) != 0)
3794 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16))
3795 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16;
3796 if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0)
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3799 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3800 && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0))
3801 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3802 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3803 if (((processor_alias_table[i].flags & PTA_MOVBE) != 0)
3804 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE))
3805 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE;
3806 if (((processor_alias_table[i].flags & PTA_AES) != 0)
3807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3808 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3809 if (((processor_alias_table[i].flags & PTA_SHA) != 0)
3810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3811 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3812 if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0)
3813 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3814 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3815 if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0)
3816 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3817 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3818 if (((processor_alias_table[i].flags & PTA_RDRND) != 0)
3819 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3820 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3821 if (((processor_alias_table[i].flags & PTA_F16C) != 0)
3822 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3823 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3824 if (((processor_alias_table[i].flags & PTA_RTM) != 0)
3825 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3826 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3827 if (((processor_alias_table[i].flags & PTA_HLE) != 0)
3828 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE))
3829 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE;
3830 if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0)
3831 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3832 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3833 if (((processor_alias_table[i].flags & PTA_RDSEED) != 0)
3834 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3835 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3836 if (((processor_alias_table[i].flags & PTA_ADX) != 0)
3837 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3838 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3839 if (((processor_alias_table[i].flags & PTA_FXSR) != 0)
3840 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3841 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3842 if (((processor_alias_table[i].flags & PTA_XSAVE) != 0)
3843 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3844 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3845 if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0)
3846 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3847 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3848 if (((processor_alias_table[i].flags & PTA_AVX512F) != 0)
3849 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3850 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3851 if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0)
3852 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3853 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3854 if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0)
3855 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3856 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3857 if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0)
3858 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3859 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3860 if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0)
3861 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3862 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3863 if (((processor_alias_table[i].flags & PTA_CLWB) != 0)
3864 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3865 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3866 if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0)
3867 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3868 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3869 if (((processor_alias_table[i].flags & PTA_CLZERO) != 0)
3870 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO))
3871 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO;
3872 if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0)
3873 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3874 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3875 if (((processor_alias_table[i].flags & PTA_XSAVES) != 0)
3876 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3877 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3878 if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0)
3879 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3880 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3881 if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0)
3882 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3883 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3884 if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0)
3885 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3886 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3887 if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0)
3888 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3889 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3890 if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0)
3891 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3892 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3893 if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0)
3894 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI))
3895 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI;
3896 if (((processor_alias_table[i].flags & PTA_GFNI) != 0)
3897 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI))
3898 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI;
3899 if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0)
3900 && !(opts->x_ix86_isa_flags_explicit
3901 & OPTION_MASK_ISA_AVX512VBMI2))
3902 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2;
3903 if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0)
3904 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ))
3905 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ;
3906 if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0)
3907 && !(opts->x_ix86_isa_flags_explicit
3908 & OPTION_MASK_ISA_AVX512BITALG))
3909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG;
3911 if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0)
3912 && !(opts->x_ix86_isa_flags2_explicit
3913 & OPTION_MASK_ISA_AVX5124VNNIW))
3914 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
3915 if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0)
3916 && !(opts->x_ix86_isa_flags2_explicit
3917 & OPTION_MASK_ISA_AVX5124FMAPS))
3918 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
3919 if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0)
3920 && !(opts->x_ix86_isa_flags_explicit
3921 & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
3922 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
3923 if (((processor_alias_table[i].flags & PTA_SGX) != 0)
3924 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
3925 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
3926 if (((processor_alias_table[i].flags & PTA_VAES) != 0)
3927 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES))
3928 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES;
3929 if (((processor_alias_table[i].flags & PTA_RDPID) != 0)
3930 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID))
3931 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID;
3932 if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0)
3933 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG))
3934 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG;
3935 if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0)
3936 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD))
3937 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD;
3938 if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0)
3939 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE))
3940 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE;
3942 if ((processor_alias_table[i].flags
3943 & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
3944 x86_prefetch_sse = true;
3945 if (((processor_alias_table[i].flags & PTA_MWAITX) != 0)
3946 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX))
3947 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX;
3948 if (((processor_alias_table[i].flags & PTA_PKU) != 0)
3949 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
3950 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
3952 /* Don't enable x87 instructions if only
3953 general registers are allowed. */
3954 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
3955 && !(opts_set->x_target_flags & MASK_80387))
3957 if (((processor_alias_table[i].flags & PTA_NO_80387) != 0))
3958 opts->x_target_flags &= ~MASK_80387;
3960 opts->x_target_flags |= MASK_80387;
3968 ? G_("bad value (%qs) for %<-march=%> switch")
3969 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
3970 opts->x_ix86_arch_string);
3972 auto_vec <const char *> candidates;
3973 for (i = 0; i < pta_size; i++)
3974 if (strcmp (processor_alias_table[i].name, "generic")
3975 && strcmp (processor_alias_table[i].name, "intel")
3976 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3977 || ((processor_alias_table[i].flags & PTA_64BIT) != 0)))
3978 candidates.safe_push (processor_alias_table[i].name);
3980 #ifdef HAVE_LOCAL_CPU_DETECT
3981 /* Add also "native" as possible value. */
3982 candidates.safe_push ("native");
3987 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
3989 inform (input_location,
3991 ? G_("valid arguments to %<-march=%> switch are: "
3992 "%s; did you mean %qs?")
3993 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
3994 "%s; did you mean %qs?"), s, hint);
3996 inform (input_location,
3998 ? G_("valid arguments to %<-march=%> switch are: %s")
3999 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4004 ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch;
4005 for (i = 0; i < X86_ARCH_LAST; ++i)
4006 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4008 for (i = 0; i < pta_size; i++)
4009 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4011 ix86_schedule = processor_alias_table[i].schedule;
4012 ix86_tune = processor_alias_table[i].processor;
4013 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4015 if (!((processor_alias_table[i].flags & PTA_64BIT) != 0))
4017 if (ix86_tune_defaulted)
4019 opts->x_ix86_tune_string = "x86-64";
4020 for (i = 0; i < pta_size; i++)
4021 if (! strcmp (opts->x_ix86_tune_string,
4022 processor_alias_table[i].name))
4024 ix86_schedule = processor_alias_table[i].schedule;
4025 ix86_tune = processor_alias_table[i].processor;
4028 error ("CPU you selected does not support x86-64 "
4032 /* Intel CPUs have always interpreted SSE prefetch instructions as
4033 NOPs; so, we can enable SSE prefetch instructions even when
4034 -mtune (rather than -march) points us to a processor that has them.
4035 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4036 higher processors. */
4038 && ((processor_alias_table[i].flags
4039 & (PTA_PREFETCH_SSE | PTA_SSE)) != 0))
4040 x86_prefetch_sse = true;
4044 if (ix86_tune_specified && i == pta_size)
4047 ? G_("bad value (%qs) for %<-mtune=%> switch")
4048 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4049 opts->x_ix86_tune_string);
4051 auto_vec <const char *> candidates;
4052 for (i = 0; i < pta_size; i++)
4053 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4054 || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
4055 candidates.safe_push (processor_alias_table[i].name);
4057 #ifdef HAVE_LOCAL_CPU_DETECT
4058 /* Add also "native" as possible value. */
4059 candidates.safe_push ("native");
4064 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4066 inform (input_location,
4068 ? G_("valid arguments to %<-mtune=%> switch are: "
4069 "%s; did you mean %qs?")
4070 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4071 "%s; did you mean %qs?"), s, hint);
4073 inform (input_location,
4075 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4076 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4081 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4083 #ifndef USE_IX86_FRAME_POINTER
4084 #define USE_IX86_FRAME_POINTER 0
4087 #ifndef USE_X86_64_FRAME_POINTER
4088 #define USE_X86_64_FRAME_POINTER 0
4091 /* Set the default values for switches whose default depends on TARGET_64BIT
4092 in case they weren't overwritten by command line options. */
4093 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4095 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4096 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4097 if (opts->x_flag_asynchronous_unwind_tables
4098 && !opts_set->x_flag_unwind_tables
4099 && TARGET_64BIT_MS_ABI)
4100 opts->x_flag_unwind_tables = 1;
4101 if (opts->x_flag_asynchronous_unwind_tables == 2)
4102 opts->x_flag_unwind_tables
4103 = opts->x_flag_asynchronous_unwind_tables = 1;
4104 if (opts->x_flag_pcc_struct_return == 2)
4105 opts->x_flag_pcc_struct_return = 0;
4109 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4110 opts->x_flag_omit_frame_pointer
4111 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4112 if (opts->x_flag_asynchronous_unwind_tables == 2)
4113 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4114 if (opts->x_flag_pcc_struct_return == 2)
4116 /* Intel MCU psABI specifies that -freg-struct-return should
4117 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4118 we check -miamcu so that -freg-struct-return is always
4119 turned on if -miamcu is used. */
4120 if (TARGET_IAMCU_P (opts->x_target_flags))
4121 opts->x_flag_pcc_struct_return = 0;
4123 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4127 ix86_tune_cost = processor_cost_table[ix86_tune];
4128 /* TODO: ix86_cost should be chosen at instruction or function granuality
4129 so for cold code we use size_cost even in !optimize_size compilation. */
4130 if (opts->x_optimize_size)
4131 ix86_cost = &ix86_size_cost;
4133 ix86_cost = ix86_tune_cost;
4135 /* Arrange to set up i386_stack_locals for all functions. */
4136 init_machine_status = ix86_init_machine_status;
4138 /* Validate -mregparm= value. */
4139 if (opts_set->x_ix86_regparm)
4141 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4142 warning (0, "-mregparm is ignored in 64-bit mode");
4143 else if (TARGET_IAMCU_P (opts->x_target_flags))
4144 warning (0, "-mregparm is ignored for Intel MCU psABI");
4145 if (opts->x_ix86_regparm > REGPARM_MAX)
4147 error ("-mregparm=%d is not between 0 and %d",
4148 opts->x_ix86_regparm, REGPARM_MAX);
4149 opts->x_ix86_regparm = 0;
4152 if (TARGET_IAMCU_P (opts->x_target_flags)
4153 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4154 opts->x_ix86_regparm = REGPARM_MAX;
4156 /* Default align_* from the processor table. */
4157 ix86_default_align (opts);
4159 /* Provide default for -mbranch-cost= value. */
4160 if (!opts_set->x_ix86_branch_cost)
4161 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4163 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4165 opts->x_target_flags
4166 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4168 /* Enable by default the SSE and MMX builtins. Do allow the user to
4169 explicitly disable any of these. In particular, disabling SSE and
4170 MMX for kernel code is extremely useful. */
4171 if (!ix86_arch_specified)
4172 opts->x_ix86_isa_flags
4173 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4174 | TARGET_SUBTARGET64_ISA_DEFAULT)
4175 & ~opts->x_ix86_isa_flags_explicit);
4177 if (TARGET_RTD_P (opts->x_target_flags))
4180 ? G_("%<-mrtd%> is ignored in 64bit mode")
4181 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4185 opts->x_target_flags
4186 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4188 if (!ix86_arch_specified)
4189 opts->x_ix86_isa_flags
4190 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4192 /* i386 ABI does not specify red zone. It still makes sense to use it
4193 when programmer takes care to stack from being destroyed. */
4194 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4195 opts->x_target_flags |= MASK_NO_RED_ZONE;
4198 /* Keep nonleaf frame pointers. */
4199 if (opts->x_flag_omit_frame_pointer)
4200 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4201 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4202 opts->x_flag_omit_frame_pointer = 1;
4204 /* If we're doing fast math, we don't care about comparison order
4205 wrt NaNs. This lets us use a shorter comparison sequence. */
4206 if (opts->x_flag_finite_math_only)
4207 opts->x_target_flags &= ~MASK_IEEE_FP;
4209 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4210 since the insns won't need emulation. */
4211 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4212 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4214 /* Likewise, if the target doesn't have a 387, or we've specified
4215 software floating point, don't use 387 inline intrinsics. */
4216 if (!TARGET_80387_P (opts->x_target_flags))
4217 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4219 /* Turn on MMX builtins for -msse. */
4220 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4221 opts->x_ix86_isa_flags
4222 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4224 /* Enable SSE prefetch. */
4225 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4226 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4227 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4228 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4229 x86_prefetch_sse = true;
4231 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4232 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4233 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4234 opts->x_ix86_isa_flags
4235 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4237 /* Enable lzcnt instruction for -mabm. */
4238 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4239 opts->x_ix86_isa_flags
4240 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4242 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4243 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4244 opts->x_ix86_isa_flags
4245 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4246 & ~opts->x_ix86_isa_flags_explicit);
4248 /* Validate -mpreferred-stack-boundary= value or default it to
4249 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4250 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4251 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4253 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4254 int max = TARGET_SEH ? 4 : 12;
4256 if (opts->x_ix86_preferred_stack_boundary_arg < min
4257 || opts->x_ix86_preferred_stack_boundary_arg > max)
4260 error ("-mpreferred-stack-boundary is not supported "
4263 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4264 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4267 ix86_preferred_stack_boundary
4268 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4271 /* Set the default value for -mstackrealign. */
4272 if (!opts_set->x_ix86_force_align_arg_pointer)
4273 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4275 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4277 /* Validate -mincoming-stack-boundary= value or default it to
4278 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4279 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4280 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4282 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4284 if (opts->x_ix86_incoming_stack_boundary_arg < min
4285 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4286 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4287 opts->x_ix86_incoming_stack_boundary_arg, min);
4290 ix86_user_incoming_stack_boundary
4291 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4292 ix86_incoming_stack_boundary
4293 = ix86_user_incoming_stack_boundary;
4297 #ifndef NO_PROFILE_COUNTERS
4298 if (flag_nop_mcount)
4299 error ("-mnop-mcount is not compatible with this target");
4301 if (flag_nop_mcount && flag_pic)
4302 error ("-mnop-mcount is not implemented for -fPIC");
4304 /* Accept -msseregparm only if at least SSE support is enabled. */
4305 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4306 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4308 ? G_("%<-msseregparm%> used without SSE enabled")
4309 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4311 if (opts_set->x_ix86_fpmath)
4313 if (opts->x_ix86_fpmath & FPMATH_SSE)
4315 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4317 if (TARGET_80387_P (opts->x_target_flags))
4319 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4320 opts->x_ix86_fpmath = FPMATH_387;
4323 else if ((opts->x_ix86_fpmath & FPMATH_387)
4324 && !TARGET_80387_P (opts->x_target_flags))
4326 warning (0, "387 instruction set disabled, using SSE arithmetics");
4327 opts->x_ix86_fpmath = FPMATH_SSE;
4331 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4332 fpmath=387. The second is however default at many targets since the
4333 extra 80bit precision of temporaries is considered to be part of ABI.
4334 Overwrite the default at least for -ffast-math.
4335 TODO: -mfpmath=both seems to produce same performing code with bit
4336 smaller binaries. It is however not clear if register allocation is
4337 ready for this setting.
4338 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4339 codegen. We may switch to 387 with -ffast-math for size optimized
4341 else if (fast_math_flags_set_p (&global_options)
4342 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4343 opts->x_ix86_fpmath = FPMATH_SSE;
4345 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4347 /* Use external vectorized library in vectorizing intrinsics. */
4348 if (opts_set->x_ix86_veclibabi_type)
4349 switch (opts->x_ix86_veclibabi_type)
4351 case ix86_veclibabi_type_svml:
4352 ix86_veclib_handler = ix86_veclibabi_svml;
4355 case ix86_veclibabi_type_acml:
4356 ix86_veclib_handler = ix86_veclibabi_acml;
4363 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4364 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4365 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4367 /* If stack probes are required, the space used for large function
4368 arguments on the stack must also be probed, so enable
4369 -maccumulate-outgoing-args so this happens in the prologue. */
4370 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4371 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4373 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4376 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4378 : G_("stack probing requires "
4379 "%<target(\"accumulate-outgoing-args\")%> for "
4381 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4384 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4385 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4386 if (fixed_regs[BP_REG]
4387 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4389 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4392 ? G_("fixed ebp register requires "
4393 "%<-maccumulate-outgoing-args%>")
4394 : G_("fixed ebp register requires "
4395 "%<target(\"accumulate-outgoing-args\")%>"));
4396 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4399 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4402 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4403 p = strchr (internal_label_prefix, 'X');
4404 internal_label_prefix_len = p - internal_label_prefix;
4408 /* When scheduling description is not available, disable scheduler pass
4409 so it won't slow down the compilation and make x87 code slower. */
4410 if (!TARGET_SCHEDULE)
4411 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4413 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4414 ix86_tune_cost->simultaneous_prefetches,
4415 opts->x_param_values,
4416 opts_set->x_param_values);
4417 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4418 ix86_tune_cost->prefetch_block,
4419 opts->x_param_values,
4420 opts_set->x_param_values);
4421 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4422 ix86_tune_cost->l1_cache_size,
4423 opts->x_param_values,
4424 opts_set->x_param_values);
4425 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4426 ix86_tune_cost->l2_cache_size,
4427 opts->x_param_values,
4428 opts_set->x_param_values);
4430 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4431 if (opts->x_flag_prefetch_loop_arrays < 0
4433 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4434 && !opts->x_optimize_size
4435 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4436 opts->x_flag_prefetch_loop_arrays = 1;
4438 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4439 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4440 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4441 targetm.expand_builtin_va_start = NULL;
4443 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4445 ix86_gen_leave = gen_leave_rex64;
4446 if (Pmode == DImode)
4448 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4449 ix86_gen_tls_local_dynamic_base_64
4450 = gen_tls_local_dynamic_base_64_di;
4454 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4455 ix86_gen_tls_local_dynamic_base_64
4456 = gen_tls_local_dynamic_base_64_si;
4460 ix86_gen_leave = gen_leave;
4462 if (Pmode == DImode)
4464 ix86_gen_add3 = gen_adddi3;
4465 ix86_gen_sub3 = gen_subdi3;
4466 ix86_gen_sub3_carry = gen_subdi3_carry;
4467 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4468 ix86_gen_andsp = gen_anddi3;
4469 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4470 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4471 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4472 ix86_gen_monitor = gen_sse3_monitor_di;
4473 ix86_gen_monitorx = gen_monitorx_di;
4474 ix86_gen_clzero = gen_clzero_di;
4478 ix86_gen_add3 = gen_addsi3;
4479 ix86_gen_sub3 = gen_subsi3;
4480 ix86_gen_sub3_carry = gen_subsi3_carry;
4481 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4482 ix86_gen_andsp = gen_andsi3;
4483 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4484 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4485 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4486 ix86_gen_monitor = gen_sse3_monitor_si;
4487 ix86_gen_monitorx = gen_monitorx_si;
4488 ix86_gen_clzero = gen_clzero_si;
4492 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4494 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4497 /* Set the default value for -mfentry. */
4498 if (!opts_set->x_flag_fentry)
4499 opts->x_flag_fentry = TARGET_SEH;
4502 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4503 && opts->x_flag_fentry)
4504 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4506 else if (TARGET_SEH && !opts->x_flag_fentry)
4507 sorry ("-mno-fentry isn%'t compatible with SEH");
4510 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4511 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4513 if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
4514 && TARGET_EMIT_VZEROUPPER)
4515 opts->x_target_flags |= MASK_VZEROUPPER;
4516 if (!(opts_set->x_target_flags & MASK_STV))
4517 opts->x_target_flags |= MASK_STV;
4518 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4519 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4520 stack realignment will be extra cost the pass doesn't take into
4521 account and the pass can't realign the stack. */
4522 if (ix86_preferred_stack_boundary < 128
4523 || ix86_incoming_stack_boundary < 128
4524 || opts->x_ix86_force_align_arg_pointer)
4525 opts->x_target_flags &= ~MASK_STV;
4526 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4527 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4528 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4529 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4530 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4531 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4533 /* Enable 128-bit AVX instruction generation
4534 for the auto-vectorizer. */
4535 if (TARGET_AVX128_OPTIMAL
4536 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4537 opts->x_prefer_vector_width_type = PVW_AVX128;
4539 /* Use 256-bit AVX instruction generation
4540 in the auto-vectorizer. */
4541 if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL]
4542 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4543 opts->x_prefer_vector_width_type = PVW_AVX256;
4545 if (opts->x_ix86_recip_name)
4547 char *p = ASTRDUP (opts->x_ix86_recip_name);
4549 unsigned int mask, i;
4552 while ((q = strtok (p, ",")) != NULL)
4563 if (!strcmp (q, "default"))
4564 mask = RECIP_MASK_ALL;
4567 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4568 if (!strcmp (q, recip_options[i].string))
4570 mask = recip_options[i].mask;
4574 if (i == ARRAY_SIZE (recip_options))
4576 error ("unknown option for -mrecip=%s", q);
4578 mask = RECIP_MASK_NONE;
4582 opts->x_recip_mask_explicit |= mask;
4584 opts->x_recip_mask &= ~mask;
4586 opts->x_recip_mask |= mask;
4590 if (TARGET_RECIP_P (opts->x_target_flags))
4591 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4592 else if (opts_set->x_target_flags & MASK_RECIP)
4593 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4595 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4596 for 64-bit Bionic. Also default long double to 64-bit for Intel
4598 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4599 && !(opts_set->x_target_flags
4600 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4601 opts->x_target_flags |= (TARGET_64BIT
4602 ? MASK_LONG_DOUBLE_128
4603 : MASK_LONG_DOUBLE_64);
4605 /* Only one of them can be active. */
4606 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4607 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4609 /* Handle stack protector */
4610 if (!opts_set->x_ix86_stack_protector_guard)
4612 #ifdef TARGET_THREAD_SSP_OFFSET
4613 if (!TARGET_HAS_BIONIC)
4614 opts->x_ix86_stack_protector_guard = SSP_TLS;
4617 opts->x_ix86_stack_protector_guard = SSP_GLOBAL;
4620 if (opts_set->x_ix86_stack_protector_guard_offset_str)
4623 const char *str = opts->x_ix86_stack_protector_guard_offset_str;
4628 #if defined(INT64_T_IS_LONG)
4629 offset = strtol (str, &endp, 0);
4631 offset = strtoll (str, &endp, 0);
4634 if (!*str || *endp || errno)
4635 error ("%qs is not a valid number "
4636 "in -mstack-protector-guard-offset=", str);
4638 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4639 HOST_WIDE_INT_C (0x7fffffff)))
4640 error ("%qs is not a valid offset "
4641 "in -mstack-protector-guard-offset=", str);
4643 opts->x_ix86_stack_protector_guard_offset = offset;
4645 #ifdef TARGET_THREAD_SSP_OFFSET
4647 opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4650 if (opts_set->x_ix86_stack_protector_guard_reg_str)
4652 const char *str = opts->x_ix86_stack_protector_guard_reg_str;
4653 addr_space_t seg = ADDR_SPACE_GENERIC;
4655 /* Discard optional register prefix. */
4659 if (strlen (str) == 2 && str[1] == 's')
4662 seg = ADDR_SPACE_SEG_FS;
4663 else if (str[0] == 'g')
4664 seg = ADDR_SPACE_SEG_GS;
4667 if (seg == ADDR_SPACE_GENERIC)
4668 error ("%qs is not a valid base register "
4669 "in -mstack-protector-guard-reg=",
4670 opts->x_ix86_stack_protector_guard_reg_str);
4672 opts->x_ix86_stack_protector_guard_reg = seg;
4676 opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4678 /* The kernel uses a different segment register for performance
4679 reasons; a system call would not have to trash the userspace
4680 segment register, which would be expensive. */
4681 if (opts->x_ix86_cmodel == CM_KERNEL)
4682 opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4685 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4686 if (opts->x_ix86_tune_memcpy_strategy)
4688 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4689 ix86_parse_stringop_strategy_string (str, false);
4693 if (opts->x_ix86_tune_memset_strategy)
4695 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4696 ix86_parse_stringop_strategy_string (str, true);
4700 /* Save the initial options in case the user does function specific
4703 target_option_default_node = target_option_current_node
4704 = build_target_option_node (opts);
4706 if (opts->x_flag_cf_protection != CF_NONE)
4707 opts->x_flag_cf_protection
4708 = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET);
4710 if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS])
4711 maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128,
4712 opts->x_param_values,
4713 opts_set->x_param_values);
4718 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4721 ix86_option_override (void)
4723 ix86_option_override_internal (true, &global_options, &global_options_set);
4726 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4728 ix86_offload_options (void)
4731 return xstrdup ("-foffload-abi=lp64");
4732 return xstrdup ("-foffload-abi=ilp32");
4735 /* Update register usage after having seen the compiler flags. */
4738 ix86_conditional_register_usage (void)
4742 /* If there are no caller-saved registers, preserve all registers.
4743 except fixed_regs and registers used for function return value
4744 since aggregate_value_p checks call_used_regs[regno] on return
4746 if (cfun && cfun->machine->no_caller_saved_registers)
4747 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4748 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4749 call_used_regs[i] = 0;
4751 /* For 32-bit targets, squash the REX registers. */
4754 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4755 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4756 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4757 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4758 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4759 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4762 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4763 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
4765 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4767 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4769 /* Set/reset conditionally defined registers from
4770 CALL_USED_REGISTERS initializer. */
4771 if (call_used_regs[i] > 1)
4772 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4774 /* Calculate registers of CLOBBERED_REGS register set
4775 as call used registers from GENERAL_REGS register set. */
4776 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4777 && call_used_regs[i])
4778 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4781 /* If MMX is disabled, squash the registers. */
4783 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4784 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4785 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4787 /* If SSE is disabled, squash the registers. */
4789 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4790 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4791 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4793 /* If the FPU is disabled, squash the registers. */
4794 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4795 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4796 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4797 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4799 /* If AVX512F is disabled, squash the registers. */
4800 if (! TARGET_AVX512F)
4802 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4803 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4805 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4806 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4810 /* Canonicalize a comparison from one we don't have to one we do have. */
4813 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4814 bool op0_preserve_value)
4816 /* The order of operands in x87 ficom compare is forced by combine in
4817 simplify_comparison () function. Float operator is treated as RTX_OBJ
4818 with a precedence over other operators and is always put in the first
4819 place. Swap condition and operands to match ficom instruction. */
4820 if (!op0_preserve_value
4821 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
4823 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
4825 /* We are called only for compares that are split to SAHF instruction.
4826 Ensure that we have setcc/jcc insn for the swapped condition. */
4827 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
4829 std::swap (*op0, *op1);
4830 *code = (int) scode;
4835 /* Save the current options */
4838 ix86_function_specific_save (struct cl_target_option *ptr,
4839 struct gcc_options *opts)
4841 ptr->arch = ix86_arch;
4842 ptr->schedule = ix86_schedule;
4843 ptr->prefetch_sse = x86_prefetch_sse;
4844 ptr->tune = ix86_tune;
4845 ptr->branch_cost = ix86_branch_cost;
4846 ptr->tune_defaulted = ix86_tune_defaulted;
4847 ptr->arch_specified = ix86_arch_specified;
4848 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4849 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
4850 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4851 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4852 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4853 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4854 ptr->x_ix86_abi = opts->x_ix86_abi;
4855 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4856 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4857 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4858 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4859 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4860 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4861 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4862 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4863 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4864 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4865 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4866 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4867 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4868 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4869 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4870 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4871 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4872 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4873 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4874 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4876 /* The fields are char but the variables are not; make sure the
4877 values fit in the fields. */
4878 gcc_assert (ptr->arch == ix86_arch);
4879 gcc_assert (ptr->schedule == ix86_schedule);
4880 gcc_assert (ptr->tune == ix86_tune);
4881 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4884 /* Restore the current options */
4887 ix86_function_specific_restore (struct gcc_options *opts,
4888 struct cl_target_option *ptr)
4890 enum processor_type old_tune = ix86_tune;
4891 enum processor_type old_arch = ix86_arch;
4892 unsigned HOST_WIDE_INT ix86_arch_mask;
4895 /* We don't change -fPIC. */
4896 opts->x_flag_pic = flag_pic;
4898 ix86_arch = (enum processor_type) ptr->arch;
4899 ix86_schedule = (enum attr_cpu) ptr->schedule;
4900 ix86_tune = (enum processor_type) ptr->tune;
4901 x86_prefetch_sse = ptr->prefetch_sse;
4902 opts->x_ix86_branch_cost = ptr->branch_cost;
4903 ix86_tune_defaulted = ptr->tune_defaulted;
4904 ix86_arch_specified = ptr->arch_specified;
4905 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4906 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
4907 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4908 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4909 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4910 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4911 opts->x_ix86_abi = ptr->x_ix86_abi;
4912 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4913 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4914 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4915 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4916 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4917 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4918 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4919 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4920 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4921 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4922 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4923 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4924 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4925 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4926 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4927 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4928 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4929 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4930 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4931 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4932 ix86_tune_cost = processor_cost_table[ix86_tune];
4933 /* TODO: ix86_cost should be chosen at instruction or function granuality
4934 so for cold code we use size_cost even in !optimize_size compilation. */
4935 if (opts->x_optimize_size)
4936 ix86_cost = &ix86_size_cost;
4938 ix86_cost = ix86_tune_cost;
4940 /* Recreate the arch feature tests if the arch changed */
4941 if (old_arch != ix86_arch)
4943 ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch;
4944 for (i = 0; i < X86_ARCH_LAST; ++i)
4945 ix86_arch_features[i]
4946 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4949 /* Recreate the tune optimization tests */
4950 if (old_tune != ix86_tune)
4951 set_ix86_tune_features (ix86_tune, false);
4954 /* Adjust target options after streaming them in. This is mainly about
4955 reconciling them with global options. */
4958 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4960 /* flag_pic is a global option, but ix86_cmodel is target saved option
4961 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4962 for PIC, or error out. */
4964 switch (ptr->x_ix86_cmodel)
4967 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4971 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4975 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4979 error ("code model %s does not support PIC mode", "kernel");
4986 switch (ptr->x_ix86_cmodel)
4989 ptr->x_ix86_cmodel = CM_SMALL;
4993 ptr->x_ix86_cmodel = CM_MEDIUM;
4997 ptr->x_ix86_cmodel = CM_LARGE;
5005 /* Print the current options */
5008 ix86_function_specific_print (FILE *file, int indent,
5009 struct cl_target_option *ptr)
5012 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5013 ptr->x_target_flags, ptr->x_ix86_target_flags,
5014 NULL, NULL, ptr->x_ix86_fpmath, false);
5016 gcc_assert (ptr->arch < PROCESSOR_max);
5017 fprintf (file, "%*sarch = %d (%s)\n",
5019 ptr->arch, processor_names[ptr->arch]);
5021 gcc_assert (ptr->tune < PROCESSOR_max);
5022 fprintf (file, "%*stune = %d (%s)\n",
5024 ptr->tune, processor_names[ptr->tune]);
5026 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5030 fprintf (file, "%*s%s\n", indent, "", target_string);
5031 free (target_string);
5036 /* Inner function to process the attribute((target(...))), take an argument and
5037 set the current options from the argument. If we have a list, recursively go
5041 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5042 struct gcc_options *opts,
5043 struct gcc_options *opts_set,
5044 struct gcc_options *enum_opts_set)
5049 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5050 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5051 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5052 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5053 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5069 enum ix86_opt_type type;
5074 IX86_ATTR_ISA ("pconfig", OPT_mpconfig),
5075 IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd),
5076 IX86_ATTR_ISA ("sgx", OPT_msgx),
5077 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5078 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5079 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5080 IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
5081 IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
5082 IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
5084 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5085 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5086 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5087 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5088 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5089 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5090 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5091 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5092 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5093 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5094 IX86_ATTR_ISA ("fma", OPT_mfma),
5095 IX86_ATTR_ISA ("xop", OPT_mxop),
5096 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5097 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5098 IX86_ATTR_ISA ("avx", OPT_mavx),
5099 IX86_ATTR_ISA ("sse4", OPT_msse4),
5100 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5101 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5102 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5103 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5104 IX86_ATTR_ISA ("sse3", OPT_msse3),
5105 IX86_ATTR_ISA ("aes", OPT_maes),
5106 IX86_ATTR_ISA ("sha", OPT_msha),
5107 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5108 IX86_ATTR_ISA ("sse2", OPT_msse2),
5109 IX86_ATTR_ISA ("sse", OPT_msse),
5110 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5111 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5112 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5113 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5114 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5115 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5116 IX86_ATTR_ISA ("adx", OPT_madx),
5117 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5118 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5119 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5120 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5121 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5122 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5123 IX86_ATTR_ISA ("abm", OPT_mabm),
5124 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5125 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5126 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5127 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5128 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5129 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5130 IX86_ATTR_ISA ("sahf", OPT_msahf),
5131 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5132 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5133 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5134 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5135 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5136 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5137 IX86_ATTR_ISA ("pku", OPT_mpku),
5138 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5139 IX86_ATTR_ISA ("hle", OPT_mhle),
5140 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5141 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5142 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5143 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5144 IX86_ATTR_ISA ("shstk", OPT_mshstk),
5145 IX86_ATTR_ISA ("vaes", OPT_mvaes),
5146 IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
5147 IX86_ATTR_ISA ("movdiri", OPT_mmovdiri),
5148 IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
5149 IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
5150 IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
5151 IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
5154 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5156 /* string options */
5157 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5158 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5161 IX86_ATTR_YES ("cld",
5165 IX86_ATTR_NO ("fancy-math-387",
5166 OPT_mfancy_math_387,
5167 MASK_NO_FANCY_MATH_387),
5169 IX86_ATTR_YES ("ieee-fp",
5173 IX86_ATTR_YES ("inline-all-stringops",
5174 OPT_minline_all_stringops,
5175 MASK_INLINE_ALL_STRINGOPS),
5177 IX86_ATTR_YES ("inline-stringops-dynamically",
5178 OPT_minline_stringops_dynamically,
5179 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5181 IX86_ATTR_NO ("align-stringops",
5182 OPT_mno_align_stringops,
5183 MASK_NO_ALIGN_STRINGOPS),
5185 IX86_ATTR_YES ("recip",
5191 /* If this is a list, recurse to get the options. */
5192 if (TREE_CODE (args) == TREE_LIST)
5196 for (; args; args = TREE_CHAIN (args))
5197 if (TREE_VALUE (args)
5198 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5199 p_strings, opts, opts_set,
5206 else if (TREE_CODE (args) != STRING_CST)
5208 error ("attribute %<target%> argument not a string");
5212 /* Handle multiple arguments separated by commas. */
5213 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5215 while (next_optstr && *next_optstr != '\0')
5217 char *p = next_optstr;
5219 char *comma = strchr (next_optstr, ',');
5220 const char *opt_string;
5221 size_t len, opt_len;
5226 enum ix86_opt_type type = ix86_opt_unknown;
5232 len = comma - next_optstr;
5233 next_optstr = comma + 1;
5241 /* Recognize no-xxx. */
5242 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5251 /* Find the option. */
5254 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5256 type = attrs[i].type;
5257 opt_len = attrs[i].len;
5258 if (ch == attrs[i].string[0]
5259 && ((type != ix86_opt_str && type != ix86_opt_enum)
5262 && memcmp (p, attrs[i].string, opt_len) == 0)
5265 mask = attrs[i].mask;
5266 opt_string = attrs[i].string;
5271 /* Process the option. */
5274 error ("attribute(target(\"%s\")) is unknown", orig_p);
5278 else if (type == ix86_opt_isa)
5280 struct cl_decoded_option decoded;
5282 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5283 ix86_handle_option (opts, opts_set,
5284 &decoded, input_location);
5287 else if (type == ix86_opt_yes || type == ix86_opt_no)
5289 if (type == ix86_opt_no)
5290 opt_set_p = !opt_set_p;
5293 opts->x_target_flags |= mask;
5295 opts->x_target_flags &= ~mask;
5298 else if (type == ix86_opt_str)
5302 error ("option(\"%s\") was already specified", opt_string);
5306 p_strings[opt] = xstrdup (p + opt_len);
5309 else if (type == ix86_opt_enum)
5314 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5316 set_option (opts, enum_opts_set, opt, value,
5317 p + opt_len, DK_UNSPECIFIED, input_location,
5321 error ("attribute(target(\"%s\")) is unknown", orig_p);
5333 /* Release allocated strings. */
5335 release_options_strings (char **option_strings)
5337 /* Free up memory allocated to hold the strings */
5338 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5339 free (option_strings[i]);
5342 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5345 ix86_valid_target_attribute_tree (tree args,
5346 struct gcc_options *opts,
5347 struct gcc_options *opts_set)
5349 const char *orig_arch_string = opts->x_ix86_arch_string;
5350 const char *orig_tune_string = opts->x_ix86_tune_string;
5351 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5352 int orig_tune_defaulted = ix86_tune_defaulted;
5353 int orig_arch_specified = ix86_arch_specified;
5354 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5356 struct cl_target_option *def
5357 = TREE_TARGET_OPTION (target_option_default_node);
5358 struct gcc_options enum_opts_set;
5360 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5362 /* Process each of the options on the chain. */
5363 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5364 opts_set, &enum_opts_set))
5365 return error_mark_node;
5367 /* If the changed options are different from the default, rerun
5368 ix86_option_override_internal, and then save the options away.
5369 The string options are attribute options, and will be undone
5370 when we copy the save structure. */
5371 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5372 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5373 || opts->x_target_flags != def->x_target_flags
5374 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5375 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5376 || enum_opts_set.x_ix86_fpmath)
5378 /* If we are using the default tune= or arch=, undo the string assigned,
5379 and use the default. */
5380 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5382 opts->x_ix86_arch_string
5383 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5385 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5386 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5387 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5388 | OPTION_MASK_ABI_64
5389 | OPTION_MASK_ABI_X32
5390 | OPTION_MASK_CODE16);
5391 opts->x_ix86_isa_flags2 = 0;
5393 else if (!orig_arch_specified)
5394 opts->x_ix86_arch_string = NULL;
5396 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5397 opts->x_ix86_tune_string
5398 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5399 else if (orig_tune_defaulted)
5400 opts->x_ix86_tune_string = NULL;
5402 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5403 if (enum_opts_set.x_ix86_fpmath)
5404 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5406 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5407 bool r = ix86_option_override_internal (false, opts, opts_set);
5410 release_options_strings (option_strings);
5411 return error_mark_node;
5414 /* Add any builtin functions with the new isa if any. */
5415 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5417 /* Save the current options unless we are validating options for
5419 t = build_target_option_node (opts);
5421 opts->x_ix86_arch_string = orig_arch_string;
5422 opts->x_ix86_tune_string = orig_tune_string;
5423 opts_set->x_ix86_fpmath = orig_fpmath_set;
5425 release_options_strings (option_strings);
5431 /* Hook to validate attribute((target("string"))). */
5434 ix86_valid_target_attribute_p (tree fndecl,
5435 tree ARG_UNUSED (name),
5437 int ARG_UNUSED (flags))
5439 struct gcc_options func_options;
5440 tree new_target, new_optimize;
5443 /* attribute((target("default"))) does nothing, beyond
5444 affecting multi-versioning. */
5445 if (TREE_VALUE (args)
5446 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5447 && TREE_CHAIN (args) == NULL_TREE
5448 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5451 tree old_optimize = build_optimization_node (&global_options);
5453 /* Get the optimization options of the current function. */
5454 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5457 func_optimize = old_optimize;
5459 /* Init func_options. */
5460 memset (&func_options, 0, sizeof (func_options));
5461 init_options_struct (&func_options, NULL);
5462 lang_hooks.init_options_struct (&func_options);
5464 cl_optimization_restore (&func_options,
5465 TREE_OPTIMIZATION (func_optimize));
5467 /* Initialize func_options to the default before its target options can
5469 cl_target_option_restore (&func_options,
5470 TREE_TARGET_OPTION (target_option_default_node));
5472 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5473 &global_options_set);
5475 new_optimize = build_optimization_node (&func_options);
5477 if (new_target == error_mark_node)
5480 else if (fndecl && new_target)
5482 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5484 if (old_optimize != new_optimize)
5485 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5488 finalize_options_struct (&func_options);
5494 /* Hook to determine if one function can safely inline another. */
5497 ix86_can_inline_p (tree caller, tree callee)
5499 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5500 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5502 /* Changes of those flags can be tolerated for always inlines. Lets hope
5503 user knows what he is doing. */
5504 const unsigned HOST_WIDE_INT always_inline_safe_mask
5505 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
5506 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
5507 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
5508 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
5509 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
5510 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
5511 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
5515 callee_tree = target_option_default_node;
5517 caller_tree = target_option_default_node;
5518 if (callee_tree == caller_tree)
5521 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5522 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5525 = (DECL_DISREGARD_INLINE_LIMITS (callee)
5526 && lookup_attribute ("always_inline",
5527 DECL_ATTRIBUTES (callee)));
5529 cgraph_node *callee_node = cgraph_node::get (callee);
5530 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5531 function can inline a SSE2 function but a SSE2 function can't inline
5533 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5534 != callee_opts->x_ix86_isa_flags)
5535 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5536 != callee_opts->x_ix86_isa_flags2))
5539 /* See if we have the same non-isa options. */
5540 else if ((!always_inline
5541 && caller_opts->x_target_flags != callee_opts->x_target_flags)
5542 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
5543 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
5546 /* See if arch, tune, etc. are the same. */
5547 else if (caller_opts->arch != callee_opts->arch)
5550 else if (!always_inline && caller_opts->tune != callee_opts->tune)
5553 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5554 /* If the calle doesn't use FP expressions differences in
5555 ix86_fpmath can be ignored. We are called from FEs
5556 for multi-versioning call optimization, so beware of
5557 ipa_fn_summaries not available. */
5558 && (! ipa_fn_summaries
5559 || ipa_fn_summaries->get (callee_node) == NULL
5560 || ipa_fn_summaries->get (callee_node)->fp_expressions))
5563 else if (!always_inline
5564 && caller_opts->branch_cost != callee_opts->branch_cost)
5574 /* Remember the last target of ix86_set_current_function. */
5575 static GTY(()) tree ix86_previous_fndecl;
5577 /* Set targets globals to the default (or current #pragma GCC target
5578 if active). Invalidate ix86_previous_fndecl cache. */
5581 ix86_reset_previous_fndecl (void)
5583 tree new_tree = target_option_current_node;
5584 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5585 if (TREE_TARGET_GLOBALS (new_tree))
5586 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5587 else if (new_tree == target_option_default_node)
5588 restore_target_globals (&default_target_globals);
5590 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5591 ix86_previous_fndecl = NULL_TREE;
5594 /* Set the func_type field from the function FNDECL. */
5597 ix86_set_func_type (tree fndecl)
5599 if (cfun->machine->func_type == TYPE_UNKNOWN)
5601 if (lookup_attribute ("interrupt",
5602 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5604 if (ix86_function_naked (fndecl))
5605 error_at (DECL_SOURCE_LOCATION (fndecl),
5606 "interrupt and naked attributes are not compatible");
5609 for (tree arg = DECL_ARGUMENTS (fndecl);
5611 arg = TREE_CHAIN (arg))
5613 cfun->machine->no_caller_saved_registers = true;
5614 cfun->machine->func_type
5615 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5617 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5619 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5620 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5621 sorry ("Only DWARF debug format is supported for interrupt "
5622 "service routine.");
5626 cfun->machine->func_type = TYPE_NORMAL;
5627 if (lookup_attribute ("no_caller_saved_registers",
5628 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5629 cfun->machine->no_caller_saved_registers = true;
5634 /* Set the indirect_branch_type field from the function FNDECL. */
5637 ix86_set_indirect_branch_type (tree fndecl)
5639 if (cfun->machine->indirect_branch_type == indirect_branch_unset)
5641 tree attr = lookup_attribute ("indirect_branch",
5642 DECL_ATTRIBUTES (fndecl));
5645 tree args = TREE_VALUE (attr);
5648 tree cst = TREE_VALUE (args);
5649 if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5650 cfun->machine->indirect_branch_type = indirect_branch_keep;
5651 else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5652 cfun->machine->indirect_branch_type = indirect_branch_thunk;
5653 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5654 cfun->machine->indirect_branch_type = indirect_branch_thunk_inline;
5655 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5656 cfun->machine->indirect_branch_type = indirect_branch_thunk_extern;
5661 cfun->machine->indirect_branch_type = ix86_indirect_branch;
5663 /* -mcmodel=large is not compatible with -mindirect-branch=thunk
5664 nor -mindirect-branch=thunk-extern. */
5665 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5666 && ((cfun->machine->indirect_branch_type
5667 == indirect_branch_thunk_extern)
5668 || (cfun->machine->indirect_branch_type
5669 == indirect_branch_thunk)))
5670 error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not "
5672 ((cfun->machine->indirect_branch_type
5673 == indirect_branch_thunk_extern)
5674 ? "thunk-extern" : "thunk"));
5677 if (cfun->machine->function_return_type == indirect_branch_unset)
5679 tree attr = lookup_attribute ("function_return",
5680 DECL_ATTRIBUTES (fndecl));
5683 tree args = TREE_VALUE (attr);
5686 tree cst = TREE_VALUE (args);
5687 if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5688 cfun->machine->function_return_type = indirect_branch_keep;
5689 else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5690 cfun->machine->function_return_type = indirect_branch_thunk;
5691 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5692 cfun->machine->function_return_type = indirect_branch_thunk_inline;
5693 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5694 cfun->machine->function_return_type = indirect_branch_thunk_extern;
5699 cfun->machine->function_return_type = ix86_function_return;
5701 /* -mcmodel=large is not compatible with -mfunction-return=thunk
5702 nor -mfunction-return=thunk-extern. */
5703 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5704 && ((cfun->machine->function_return_type
5705 == indirect_branch_thunk_extern)
5706 || (cfun->machine->function_return_type
5707 == indirect_branch_thunk)))
5708 error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not "
5710 ((cfun->machine->function_return_type
5711 == indirect_branch_thunk_extern)
5712 ? "thunk-extern" : "thunk"));
5716 /* Establish appropriate back-end context for processing the function
5717 FNDECL. The argument might be NULL to indicate processing at top
5718 level, outside of any function scope. */
5720 ix86_set_current_function (tree fndecl)
5722 /* Only change the context if the function changes. This hook is called
5723 several times in the course of compiling a function, and we don't want to
5724 slow things down too much or call target_reinit when it isn't safe. */
5725 if (fndecl == ix86_previous_fndecl)
5727 /* There may be 2 function bodies for the same function FNDECL,
5728 one is extern inline and one isn't. Call ix86_set_func_type
5729 to set the func_type field. */
5730 if (fndecl != NULL_TREE)
5732 ix86_set_func_type (fndecl);
5733 ix86_set_indirect_branch_type (fndecl);
5739 if (ix86_previous_fndecl == NULL_TREE)
5740 old_tree = target_option_current_node;
5741 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5742 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5744 old_tree = target_option_default_node;
5746 if (fndecl == NULL_TREE)
5748 if (old_tree != target_option_current_node)
5749 ix86_reset_previous_fndecl ();
5753 ix86_set_func_type (fndecl);
5754 ix86_set_indirect_branch_type (fndecl);
5756 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5757 if (new_tree == NULL_TREE)
5758 new_tree = target_option_default_node;
5760 if (old_tree != new_tree)
5762 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5763 if (TREE_TARGET_GLOBALS (new_tree))
5764 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5765 else if (new_tree == target_option_default_node)
5766 restore_target_globals (&default_target_globals);
5768 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5770 ix86_previous_fndecl = fndecl;
5772 static bool prev_no_caller_saved_registers;
5774 /* 64-bit MS and SYSV ABI have different set of call used registers.
5775 Avoid expensive re-initialization of init_regs each time we switch
5776 function context. */
5778 && (call_used_regs[SI_REG]
5779 == (cfun->machine->call_abi == MS_ABI)))
5781 /* Need to re-initialize init_regs if caller-saved registers are
5783 else if (prev_no_caller_saved_registers
5784 != cfun->machine->no_caller_saved_registers)
5787 if (cfun->machine->func_type != TYPE_NORMAL
5788 || cfun->machine->no_caller_saved_registers)
5790 /* Don't allow SSE, MMX nor x87 instructions since they
5791 may change processor state. */
5795 else if (TARGET_MMX)
5797 else if (TARGET_80387)
5803 if (cfun->machine->func_type != TYPE_NORMAL)
5804 sorry ("%s instructions aren't allowed in %s service routine",
5805 isa, (cfun->machine->func_type == TYPE_EXCEPTION
5806 ? "exception" : "interrupt"));
5808 sorry ("%s instructions aren't allowed in function with "
5809 "no_caller_saved_registers attribute", isa);
5810 /* Don't issue the same error twice. */
5811 cfun->machine->func_type = TYPE_NORMAL;
5812 cfun->machine->no_caller_saved_registers = false;
5816 prev_no_caller_saved_registers
5817 = cfun->machine->no_caller_saved_registers;
5821 /* Return true if this goes in large data/bss. */
5824 ix86_in_large_data_p (tree exp)
5826 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5829 if (exp == NULL_TREE)
5832 /* Functions are never large data. */
5833 if (TREE_CODE (exp) == FUNCTION_DECL)
5836 /* Automatic variables are never large data. */
5837 if (VAR_P (exp) && !is_global_var (exp))
5840 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
5842 const char *section = DECL_SECTION_NAME (exp);
5843 if (strcmp (section, ".ldata") == 0
5844 || strcmp (section, ".lbss") == 0)
5850 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5852 /* If this is an incomplete type with size 0, then we can't put it
5853 in data because it might be too big when completed. Also,
5854 int_size_in_bytes returns -1 if size can vary or is larger than
5855 an integer in which case also it is safer to assume that it goes in
5857 if (size <= 0 || size > ix86_section_threshold)
5864 /* i386-specific section flag to mark large sections. */
5865 #define SECTION_LARGE SECTION_MACH_DEP
5867 /* Switch to the appropriate section for output of DECL.
5868 DECL is either a `VAR_DECL' node or a constant of some sort.
5869 RELOC indicates whether forming the initial value of DECL requires
5870 link-time relocations. */
5872 ATTRIBUTE_UNUSED static section *
5873 x86_64_elf_select_section (tree decl, int reloc,
5874 unsigned HOST_WIDE_INT align)
5876 if (ix86_in_large_data_p (decl))
5878 const char *sname = NULL;
5879 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
5880 switch (categorize_decl_for_section (decl, reloc))
5885 case SECCAT_DATA_REL:
5886 sname = ".ldata.rel";
5888 case SECCAT_DATA_REL_LOCAL:
5889 sname = ".ldata.rel.local";
5891 case SECCAT_DATA_REL_RO:
5892 sname = ".ldata.rel.ro";
5894 case SECCAT_DATA_REL_RO_LOCAL:
5895 sname = ".ldata.rel.ro.local";
5899 flags |= SECTION_BSS;
5902 case SECCAT_RODATA_MERGE_STR:
5903 case SECCAT_RODATA_MERGE_STR_INIT:
5904 case SECCAT_RODATA_MERGE_CONST:
5906 flags &= ~SECTION_WRITE;
5908 case SECCAT_SRODATA:
5915 /* We don't split these for medium model. Place them into
5916 default sections and hope for best. */
5921 /* We might get called with string constants, but get_named_section
5922 doesn't like them as they are not DECLs. Also, we need to set
5923 flags in that case. */
5925 return get_section (sname, flags, NULL);
5926 return get_named_section (decl, sname, reloc);
5929 return default_elf_select_section (decl, reloc, align);
5932 /* Select a set of attributes for section NAME based on the properties
5933 of DECL and whether or not RELOC indicates that DECL's initializer
5934 might contain runtime relocations. */
5936 static unsigned int ATTRIBUTE_UNUSED
5937 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5939 unsigned int flags = default_section_type_flags (decl, name, reloc);
5941 if (ix86_in_large_data_p (decl))
5942 flags |= SECTION_LARGE;
5944 if (decl == NULL_TREE
5945 && (strcmp (name, ".ldata.rel.ro") == 0
5946 || strcmp (name, ".ldata.rel.ro.local") == 0))
5947 flags |= SECTION_RELRO;
5949 if (strcmp (name, ".lbss") == 0
5950 || strncmp (name, ".lbss.", 5) == 0
5951 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5952 flags |= SECTION_BSS;
5957 /* Build up a unique section name, expressed as a
5958 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5959 RELOC indicates whether the initial value of EXP requires
5960 link-time relocations. */
5962 static void ATTRIBUTE_UNUSED
5963 x86_64_elf_unique_section (tree decl, int reloc)
5965 if (ix86_in_large_data_p (decl))
5967 const char *prefix = NULL;
5968 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5969 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5971 switch (categorize_decl_for_section (decl, reloc))
5974 case SECCAT_DATA_REL:
5975 case SECCAT_DATA_REL_LOCAL:
5976 case SECCAT_DATA_REL_RO:
5977 case SECCAT_DATA_REL_RO_LOCAL:
5978 prefix = one_only ? ".ld" : ".ldata";
5981 prefix = one_only ? ".lb" : ".lbss";
5984 case SECCAT_RODATA_MERGE_STR:
5985 case SECCAT_RODATA_MERGE_STR_INIT:
5986 case SECCAT_RODATA_MERGE_CONST:
5987 prefix = one_only ? ".lr" : ".lrodata";
5989 case SECCAT_SRODATA:
5996 /* We don't split these for medium model. Place them into
5997 default sections and hope for best. */
6002 const char *name, *linkonce;
6005 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6006 name = targetm.strip_name_encoding (name);
6008 /* If we're using one_only, then there needs to be a .gnu.linkonce
6009 prefix to the section name. */
6010 linkonce = one_only ? ".gnu.linkonce" : "";
6012 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6014 set_decl_section_name (decl, string);
6018 default_unique_section (decl, reloc);
6021 #ifdef COMMON_ASM_OP
6023 #ifndef LARGECOMM_SECTION_ASM_OP
6024 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6027 /* This says how to output assembler code to declare an
6028 uninitialized external linkage data object.
6030 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6033 x86_elf_aligned_decl_common (FILE *file, tree decl,
6034 const char *name, unsigned HOST_WIDE_INT size,
6037 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6038 && size > (unsigned int)ix86_section_threshold)
6040 switch_to_section (get_named_section (decl, ".lbss", 0));
6041 fputs (LARGECOMM_SECTION_ASM_OP, file);
6044 fputs (COMMON_ASM_OP, file);
6045 assemble_name (file, name);
6046 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6047 size, align / BITS_PER_UNIT);
6051 /* Utility function for targets to use in implementing
6052 ASM_OUTPUT_ALIGNED_BSS. */
6055 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6056 unsigned HOST_WIDE_INT size, int align)
6058 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6059 && size > (unsigned int)ix86_section_threshold)
6060 switch_to_section (get_named_section (decl, ".lbss", 0));
6062 switch_to_section (bss_section);
6063 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6064 #ifdef ASM_DECLARE_OBJECT_NAME
6065 last_assemble_variable_decl = decl;
6066 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6068 /* Standard thing is just output label for the object. */
6069 ASM_OUTPUT_LABEL (file, name);
6070 #endif /* ASM_DECLARE_OBJECT_NAME */
6071 ASM_OUTPUT_SKIP (file, size ? size : 1);
6074 /* Decide whether we must probe the stack before any space allocation
6075 on this target. It's essentially TARGET_STACK_PROBE except when
6076 -fstack-check causes the stack to be already probed differently. */
6079 ix86_target_stack_probe (void)
6081 /* Do not probe the stack twice if static stack checking is enabled. */
6082 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6085 return TARGET_STACK_PROBE;
6088 /* Decide whether we can make a sibling call to a function. DECL is the
6089 declaration of the function being targeted by the call and EXP is the
6090 CALL_EXPR representing the call. */
6093 ix86_function_ok_for_sibcall (tree decl, tree exp)
6095 tree type, decl_or_type;
6097 bool bind_global = decl && !targetm.binds_local_p (decl);
6099 if (ix86_function_naked (current_function_decl))
6102 /* Sibling call isn't OK if there are no caller-saved registers
6103 since all registers must be preserved before return. */
6104 if (cfun->machine->no_caller_saved_registers)
6107 /* If we are generating position-independent code, we cannot sibcall
6108 optimize direct calls to global functions, as the PLT requires
6109 %ebx be live. (Darwin does not have a PLT.) */
6117 /* If we need to align the outgoing stack, then sibcalling would
6118 unalign the stack, which may break the called function. */
6119 if (ix86_minimum_incoming_stack_boundary (true)
6120 < PREFERRED_STACK_BOUNDARY)
6125 decl_or_type = decl;
6126 type = TREE_TYPE (decl);
6130 /* We're looking at the CALL_EXPR, we need the type of the function. */
6131 type = CALL_EXPR_FN (exp); /* pointer expression */
6132 type = TREE_TYPE (type); /* pointer type */
6133 type = TREE_TYPE (type); /* function type */
6134 decl_or_type = type;
6137 /* Check that the return value locations are the same. Like
6138 if we are returning floats on the 80387 register stack, we cannot
6139 make a sibcall from a function that doesn't return a float to a
6140 function that does or, conversely, from a function that does return
6141 a float to a function that doesn't; the necessary stack adjustment
6142 would not be executed. This is also the place we notice
6143 differences in the return value ABI. Note that it is ok for one
6144 of the functions to have void return type as long as the return
6145 value of the other is passed in a register. */
6146 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6147 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6149 if (STACK_REG_P (a) || STACK_REG_P (b))
6151 if (!rtx_equal_p (a, b))
6154 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6156 else if (!rtx_equal_p (a, b))
6161 /* The SYSV ABI has more call-clobbered registers;
6162 disallow sibcalls from MS to SYSV. */
6163 if (cfun->machine->call_abi == MS_ABI
6164 && ix86_function_type_abi (type) == SYSV_ABI)
6169 /* If this call is indirect, we'll need to be able to use a
6170 call-clobbered register for the address of the target function.
6171 Make sure that all such registers are not used for passing
6172 parameters. Note that DLLIMPORT functions and call to global
6173 function via GOT slot are indirect. */
6175 || (bind_global && flag_pic && !flag_plt)
6176 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
6177 || flag_force_indirect_call)
6179 /* Check if regparm >= 3 since arg_reg_available is set to
6180 false if regparm == 0. If regparm is 1 or 2, there is
6181 always a call-clobbered register available.
6183 ??? The symbol indirect call doesn't need a call-clobbered
6184 register. But we don't know if this is a symbol indirect
6185 call or not here. */
6186 if (ix86_function_regparm (type, decl) >= 3
6187 && !cfun->machine->arg_reg_available)
6192 /* Otherwise okay. That also includes certain types of indirect calls. */
6196 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6197 and "sseregparm" calling convention attributes;
6198 arguments as in struct attribute_spec.handler. */
6201 ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
6204 if (TREE_CODE (*node) != FUNCTION_TYPE
6205 && TREE_CODE (*node) != METHOD_TYPE
6206 && TREE_CODE (*node) != FIELD_DECL
6207 && TREE_CODE (*node) != TYPE_DECL)
6209 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6211 *no_add_attrs = true;
6215 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6216 if (is_attribute_p ("regparm", name))
6220 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6222 error ("fastcall and regparm attributes are not compatible");
6225 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6227 error ("regparam and thiscall attributes are not compatible");
6230 cst = TREE_VALUE (args);
6231 if (TREE_CODE (cst) != INTEGER_CST)
6233 warning (OPT_Wattributes,
6234 "%qE attribute requires an integer constant argument",
6236 *no_add_attrs = true;
6238 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6240 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6242 *no_add_attrs = true;
6250 /* Do not warn when emulating the MS ABI. */
6251 if ((TREE_CODE (*node) != FUNCTION_TYPE
6252 && TREE_CODE (*node) != METHOD_TYPE)
6253 || ix86_function_type_abi (*node) != MS_ABI)
6254 warning (OPT_Wattributes, "%qE attribute ignored",
6256 *no_add_attrs = true;
6260 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6261 if (is_attribute_p ("fastcall", name))
6263 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6265 error ("fastcall and cdecl attributes are not compatible");
6267 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6269 error ("fastcall and stdcall attributes are not compatible");
6271 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6273 error ("fastcall and regparm attributes are not compatible");
6275 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6277 error ("fastcall and thiscall attributes are not compatible");
6281 /* Can combine stdcall with fastcall (redundant), regparm and
6283 else if (is_attribute_p ("stdcall", name))
6285 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6287 error ("stdcall and cdecl attributes are not compatible");
6289 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6291 error ("stdcall and fastcall attributes are not compatible");
6293 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6295 error ("stdcall and thiscall attributes are not compatible");
6299 /* Can combine cdecl with regparm and sseregparm. */
6300 else if (is_attribute_p ("cdecl", name))
6302 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6304 error ("stdcall and cdecl attributes are not compatible");
6306 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6308 error ("fastcall and cdecl attributes are not compatible");
6310 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6312 error ("cdecl and thiscall attributes are not compatible");
6315 else if (is_attribute_p ("thiscall", name))
6317 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6318 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6320 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6322 error ("stdcall and thiscall attributes are not compatible");
6324 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6326 error ("fastcall and thiscall attributes are not compatible");
6328 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6330 error ("cdecl and thiscall attributes are not compatible");
6334 /* Can combine sseregparm with all attributes. */
6339 /* The transactional memory builtins are implicitly regparm or fastcall
6340 depending on the ABI. Override the generic do-nothing attribute that
6341 these builtins were declared with, and replace it with one of the two
6342 attributes that we expect elsewhere. */
6345 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6346 int flags, bool *no_add_attrs)
6350 /* In no case do we want to add the placeholder attribute. */
6351 *no_add_attrs = true;
6353 /* The 64-bit ABI is unchanged for transactional memory. */
6357 /* ??? Is there a better way to validate 32-bit windows? We have
6358 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6359 if (CHECK_STACK_LIMIT > 0)
6360 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6363 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6364 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6366 decl_attributes (node, alt, flags);
6371 /* This function determines from TYPE the calling-convention. */
6374 ix86_get_callcvt (const_tree type)
6376 unsigned int ret = 0;
6381 return IX86_CALLCVT_CDECL;
6383 attrs = TYPE_ATTRIBUTES (type);
6384 if (attrs != NULL_TREE)
6386 if (lookup_attribute ("cdecl", attrs))
6387 ret |= IX86_CALLCVT_CDECL;
6388 else if (lookup_attribute ("stdcall", attrs))
6389 ret |= IX86_CALLCVT_STDCALL;
6390 else if (lookup_attribute ("fastcall", attrs))
6391 ret |= IX86_CALLCVT_FASTCALL;
6392 else if (lookup_attribute ("thiscall", attrs))
6393 ret |= IX86_CALLCVT_THISCALL;
6395 /* Regparam isn't allowed for thiscall and fastcall. */
6396 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6398 if (lookup_attribute ("regparm", attrs))
6399 ret |= IX86_CALLCVT_REGPARM;
6400 if (lookup_attribute ("sseregparm", attrs))
6401 ret |= IX86_CALLCVT_SSEREGPARM;
6404 if (IX86_BASE_CALLCVT(ret) != 0)
6408 is_stdarg = stdarg_p (type);
6409 if (TARGET_RTD && !is_stdarg)
6410 return IX86_CALLCVT_STDCALL | ret;
6414 || TREE_CODE (type) != METHOD_TYPE
6415 || ix86_function_type_abi (type) != MS_ABI)
6416 return IX86_CALLCVT_CDECL | ret;
6418 return IX86_CALLCVT_THISCALL;
6421 /* Return 0 if the attributes for two types are incompatible, 1 if they
6422 are compatible, and 2 if they are nearly compatible (which causes a
6423 warning to be generated). */
6426 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6428 unsigned int ccvt1, ccvt2;
6430 if (TREE_CODE (type1) != FUNCTION_TYPE
6431 && TREE_CODE (type1) != METHOD_TYPE)
6434 ccvt1 = ix86_get_callcvt (type1);
6435 ccvt2 = ix86_get_callcvt (type2);
6438 if (ix86_function_regparm (type1, NULL)
6439 != ix86_function_regparm (type2, NULL))
6445 /* Return the regparm value for a function with the indicated TYPE and DECL.
6446 DECL may be NULL when calling function indirectly
6447 or considering a libcall. */
6450 ix86_function_regparm (const_tree type, const_tree decl)
6457 return (ix86_function_type_abi (type) == SYSV_ABI
6458 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6459 ccvt = ix86_get_callcvt (type);
6460 regparm = ix86_regparm;
6462 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6464 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6467 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6471 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6473 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6476 /* Use register calling convention for local functions when possible. */
6478 && TREE_CODE (decl) == FUNCTION_DECL)
6480 cgraph_node *target = cgraph_node::get (decl);
6482 target = target->function_symbol ();
6484 /* Caller and callee must agree on the calling convention, so
6485 checking here just optimize means that with
6486 __attribute__((optimize (...))) caller could use regparm convention
6487 and callee not, or vice versa. Instead look at whether the callee
6488 is optimized or not. */
6489 if (target && opt_for_fn (target->decl, optimize)
6490 && !(profile_flag && !flag_fentry))
6492 cgraph_local_info *i = &target->local;
6493 if (i && i->local && i->can_change_signature)
6495 int local_regparm, globals = 0, regno;
6497 /* Make sure no regparm register is taken by a
6498 fixed register variable. */
6499 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6501 if (fixed_regs[local_regparm])
6504 /* We don't want to use regparm(3) for nested functions as
6505 these use a static chain pointer in the third argument. */
6506 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6509 /* Save a register for the split stack. */
6510 if (flag_split_stack)
6512 if (local_regparm == 3)
6514 else if (local_regparm == 2
6515 && DECL_STATIC_CHAIN (target->decl))
6519 /* Each fixed register usage increases register pressure,
6520 so less registers should be used for argument passing.
6521 This functionality can be overriden by an explicit
6523 for (regno = AX_REG; regno <= DI_REG; regno++)
6524 if (fixed_regs[regno])
6528 = globals < local_regparm ? local_regparm - globals : 0;
6530 if (local_regparm > regparm)
6531 regparm = local_regparm;
6539 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6540 DFmode (2) arguments in SSE registers for a function with the
6541 indicated TYPE and DECL. DECL may be NULL when calling function
6542 indirectly or considering a libcall. Return -1 if any FP parameter
6543 should be rejected by error. This is used in siutation we imply SSE
6544 calling convetion but the function is called from another function with
6545 SSE disabled. Otherwise return 0. */
6548 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6550 gcc_assert (!TARGET_64BIT);
6552 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6553 by the sseregparm attribute. */
6554 if (TARGET_SSEREGPARM
6555 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6562 error ("calling %qD with attribute sseregparm without "
6563 "SSE/SSE2 enabled", decl);
6565 error ("calling %qT with attribute sseregparm without "
6566 "SSE/SSE2 enabled", type);
6577 cgraph_node *target = cgraph_node::get (decl);
6579 target = target->function_symbol ();
6581 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6582 (and DFmode for SSE2) arguments in SSE registers. */
6584 /* TARGET_SSE_MATH */
6585 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6586 && opt_for_fn (target->decl, optimize)
6587 && !(profile_flag && !flag_fentry))
6589 cgraph_local_info *i = &target->local;
6590 if (i && i->local && i->can_change_signature)
6592 /* Refuse to produce wrong code when local function with SSE enabled
6593 is called from SSE disabled function.
6594 FIXME: We need a way to detect these cases cross-ltrans partition
6595 and avoid using SSE calling conventions on local functions called
6596 from function with SSE disabled. For now at least delay the
6597 warning until we know we are going to produce wrong code.
6599 if (!TARGET_SSE && warn)
6601 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6602 ->x_ix86_isa_flags) ? 2 : 1;
6609 /* Return true if EAX is live at the start of the function. Used by
6610 ix86_expand_prologue to determine if we need special help before
6611 calling allocate_stack_worker. */
6614 ix86_eax_live_at_start_p (void)
6616 /* Cheat. Don't bother working forward from ix86_function_regparm
6617 to the function type to whether an actual argument is located in
6618 eax. Instead just look at cfg info, which is still close enough
6619 to correct at this point. This gives false positives for broken
6620 functions that might use uninitialized data that happens to be
6621 allocated in eax, but who cares? */
6622 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6626 ix86_keep_aggregate_return_pointer (tree fntype)
6632 attr = lookup_attribute ("callee_pop_aggregate_return",
6633 TYPE_ATTRIBUTES (fntype));
6635 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6637 /* For 32-bit MS-ABI the default is to keep aggregate
6639 if (ix86_function_type_abi (fntype) == MS_ABI)
6642 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6645 /* Value is the number of bytes of arguments automatically
6646 popped when returning from a subroutine call.
6647 FUNDECL is the declaration node of the function (as a tree),
6648 FUNTYPE is the data type of the function (as a tree),
6649 or for a library call it is an identifier node for the subroutine name.
6650 SIZE is the number of bytes of arguments passed on the stack.
6652 On the 80386, the RTD insn may be used to pop them if the number
6653 of args is fixed, but if the number is variable then the caller
6654 must pop them all. RTD can't be used for library calls now
6655 because the library is compiled with the Unix compiler.
6656 Use of RTD is a selectable option, since it is incompatible with
6657 standard Unix calling sequences. If the option is not selected,
6658 the caller must always pop the args.
6660 The attribute stdcall is equivalent to RTD on a per module basis. */
6663 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
6667 /* None of the 64-bit ABIs pop arguments. */
6671 ccvt = ix86_get_callcvt (funtype);
6673 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6674 | IX86_CALLCVT_THISCALL)) != 0
6675 && ! stdarg_p (funtype))
6678 /* Lose any fake structure return argument if it is passed on the stack. */
6679 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6680 && !ix86_keep_aggregate_return_pointer (funtype))
6682 int nregs = ix86_function_regparm (funtype, fundecl);
6684 return GET_MODE_SIZE (Pmode);
6690 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6693 ix86_legitimate_combined_insn (rtx_insn *insn)
6697 /* Check operand constraints in case hard registers were propagated
6698 into insn pattern. This check prevents combine pass from
6699 generating insn patterns with invalid hard register operands.
6700 These invalid insns can eventually confuse reload to error out
6701 with a spill failure. See also PRs 46829 and 46843. */
6703 gcc_assert (INSN_CODE (insn) >= 0);
6705 extract_insn (insn);
6706 preprocess_constraints (insn);
6708 int n_operands = recog_data.n_operands;
6709 int n_alternatives = recog_data.n_alternatives;
6710 for (i = 0; i < n_operands; i++)
6712 rtx op = recog_data.operand[i];
6713 machine_mode mode = GET_MODE (op);
6714 const operand_alternative *op_alt;
6719 /* A unary operator may be accepted by the predicate, but it
6720 is irrelevant for matching constraints. */
6726 if (REG_P (SUBREG_REG (op))
6727 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6728 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6729 GET_MODE (SUBREG_REG (op)),
6732 op = SUBREG_REG (op);
6735 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6738 op_alt = recog_op_alt;
6740 /* Operand has no constraints, anything is OK. */
6741 win = !n_alternatives;
6743 alternative_mask preferred = get_preferred_alternatives (insn);
6744 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6746 if (!TEST_BIT (preferred, j))
6748 if (op_alt[i].anything_ok
6749 || (op_alt[i].matches != -1
6751 (recog_data.operand[i],
6752 recog_data.operand[op_alt[i].matches]))
6753 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6767 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6769 static unsigned HOST_WIDE_INT
6770 ix86_asan_shadow_offset (void)
6772 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6773 : HOST_WIDE_INT_C (0x7fff8000))
6774 : (HOST_WIDE_INT_1 << 29);
6777 /* Argument support functions. */
6779 /* Return true when register may be used to pass function parameters. */
6781 ix86_function_arg_regno_p (int regno)
6784 enum calling_abi call_abi;
6785 const int *parm_regs;
6790 return (regno < REGPARM_MAX
6791 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6793 return (regno < REGPARM_MAX
6794 || (TARGET_MMX && MMX_REGNO_P (regno)
6795 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6796 || (TARGET_SSE && SSE_REGNO_P (regno)
6797 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6800 if (TARGET_SSE && SSE_REGNO_P (regno)
6801 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6804 /* TODO: The function should depend on current function ABI but
6805 builtins.c would need updating then. Therefore we use the
6807 call_abi = ix86_cfun_abi ();
6809 /* RAX is used as hidden argument to va_arg functions. */
6810 if (call_abi == SYSV_ABI && regno == AX_REG)
6813 if (call_abi == MS_ABI)
6814 parm_regs = x86_64_ms_abi_int_parameter_registers;
6816 parm_regs = x86_64_int_parameter_registers;
6818 for (i = 0; i < (call_abi == MS_ABI
6819 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6820 if (regno == parm_regs[i])
6825 /* Return if we do not know how to pass TYPE solely in registers. */
6828 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6830 if (must_pass_in_stack_var_size_or_pad (mode, type))
6833 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6834 The layout_type routine is crafty and tries to trick us into passing
6835 currently unsupported vector types on the stack by using TImode. */
6836 return (!TARGET_64BIT && mode == TImode
6837 && type && TREE_CODE (type) != VECTOR_TYPE);
6840 /* It returns the size, in bytes, of the area reserved for arguments passed
6841 in registers for the function represented by fndecl dependent to the used
6844 ix86_reg_parm_stack_space (const_tree fndecl)
6846 enum calling_abi call_abi = SYSV_ABI;
6847 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6848 call_abi = ix86_function_abi (fndecl);
6850 call_abi = ix86_function_type_abi (fndecl);
6851 if (TARGET_64BIT && call_abi == MS_ABI)
6856 /* We add this as a workaround in order to use libc_has_function
6859 ix86_libc_has_function (enum function_class fn_class)
6861 return targetm.libc_has_function (fn_class);
6864 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6865 specifying the call abi used. */
6867 ix86_function_type_abi (const_tree fntype)
6869 enum calling_abi abi = ix86_abi;
6871 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6875 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6878 if (TARGET_X32 && !warned)
6880 error ("X32 does not support ms_abi attribute");
6886 else if (abi == MS_ABI
6887 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6893 static enum calling_abi
6894 ix86_function_abi (const_tree fndecl)
6896 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6899 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6900 specifying the call abi used. */
6902 ix86_cfun_abi (void)
6904 return cfun ? cfun->machine->call_abi : ix86_abi;
6908 ix86_function_ms_hook_prologue (const_tree fn)
6910 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6912 if (decl_function_context (fn) != NULL_TREE)
6913 error_at (DECL_SOURCE_LOCATION (fn),
6914 "ms_hook_prologue is not compatible with nested function");
6922 ix86_function_naked (const_tree fn)
6924 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
6930 /* Write the extra assembler code needed to declare a function properly. */
6933 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6936 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6940 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6941 unsigned int filler_cc = 0xcccccccc;
6943 for (i = 0; i < filler_count; i += 4)
6944 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6947 #ifdef SUBTARGET_ASM_UNWIND_INIT
6948 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6951 ASM_OUTPUT_LABEL (asm_out_file, fname);
6953 /* Output magic byte marker, if hot-patch attribute is set. */
6958 /* leaq [%rsp + 0], %rsp */
6959 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
6964 /* movl.s %edi, %edi
6966 movl.s %esp, %ebp */
6967 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
6972 /* Implementation of call abi switching target hook. Specific to FNDECL
6973 the specific call register sets are set. See also
6974 ix86_conditional_register_usage for more details. */
6976 ix86_call_abi_override (const_tree fndecl)
6978 cfun->machine->call_abi = ix86_function_abi (fndecl);
6981 /* Return 1 if pseudo register should be created and used to hold
6982 GOT address for PIC code. */
6984 ix86_use_pseudo_pic_reg (void)
6987 && (ix86_cmodel == CM_SMALL_PIC
6994 /* Initialize large model PIC register. */
6997 ix86_init_large_pic_reg (unsigned int tmp_regno)
6999 rtx_code_label *label;
7002 gcc_assert (Pmode == DImode);
7003 label = gen_label_rtx ();
7005 LABEL_PRESERVE_P (label) = 1;
7006 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7007 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7008 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7010 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7011 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7012 pic_offset_table_rtx, tmp_reg));
7013 const char *name = LABEL_NAME (label);
7014 PUT_CODE (label, NOTE);
7015 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7016 NOTE_DELETED_LABEL_NAME (label) = name;
7019 /* Create and initialize PIC register if required. */
7021 ix86_init_pic_reg (void)
7026 if (!ix86_use_pseudo_pic_reg ())
7033 if (ix86_cmodel == CM_LARGE_PIC)
7034 ix86_init_large_pic_reg (R11_REG);
7036 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7040 /* If there is future mcount call in the function it is more profitable
7041 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7042 rtx reg = crtl->profile
7043 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7044 : pic_offset_table_rtx;
7045 rtx_insn *insn = emit_insn (gen_set_got (reg));
7046 RTX_FRAME_RELATED_P (insn) = 1;
7048 emit_move_insn (pic_offset_table_rtx, reg);
7049 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7055 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7056 insert_insn_on_edge (seq, entry_edge);
7057 commit_one_edge_insertion (entry_edge);
7060 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7061 for a call to a function whose data type is FNTYPE.
7062 For a library call, FNTYPE is 0. */
7065 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7066 tree fntype, /* tree ptr for function decl */
7067 rtx libname, /* SYMBOL_REF of library name or 0 */
7071 struct cgraph_local_info *i = NULL;
7072 struct cgraph_node *target = NULL;
7074 memset (cum, 0, sizeof (*cum));
7078 target = cgraph_node::get (fndecl);
7081 target = target->function_symbol ();
7082 i = cgraph_node::local_info (target->decl);
7083 cum->call_abi = ix86_function_abi (target->decl);
7086 cum->call_abi = ix86_function_abi (fndecl);
7089 cum->call_abi = ix86_function_type_abi (fntype);
7091 cum->caller = caller;
7093 /* Set up the number of registers to use for passing arguments. */
7094 cum->nregs = ix86_regparm;
7097 cum->nregs = (cum->call_abi == SYSV_ABI
7098 ? X86_64_REGPARM_MAX
7099 : X86_64_MS_REGPARM_MAX);
7103 cum->sse_nregs = SSE_REGPARM_MAX;
7106 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7107 ? X86_64_SSE_REGPARM_MAX
7108 : X86_64_MS_SSE_REGPARM_MAX);
7112 cum->mmx_nregs = MMX_REGPARM_MAX;
7113 cum->warn_avx512f = true;
7114 cum->warn_avx = true;
7115 cum->warn_sse = true;
7116 cum->warn_mmx = true;
7118 /* Because type might mismatch in between caller and callee, we need to
7119 use actual type of function for local calls.
7120 FIXME: cgraph_analyze can be told to actually record if function uses
7121 va_start so for local functions maybe_vaarg can be made aggressive
7123 FIXME: once typesytem is fixed, we won't need this code anymore. */
7124 if (i && i->local && i->can_change_signature)
7125 fntype = TREE_TYPE (target->decl);
7126 cum->stdarg = stdarg_p (fntype);
7127 cum->maybe_vaarg = (fntype
7128 ? (!prototype_p (fntype) || stdarg_p (fntype))
7133 cum->warn_empty = !warn_abi || cum->stdarg;
7134 if (!cum->warn_empty && fntype)
7136 function_args_iterator iter;
7138 bool seen_empty_type = false;
7139 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
7141 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
7143 if (TYPE_EMPTY_P (argtype))
7144 seen_empty_type = true;
7145 else if (seen_empty_type)
7147 cum->warn_empty = true;
7155 /* If there are variable arguments, then we won't pass anything
7156 in registers in 32-bit mode. */
7157 if (stdarg_p (fntype))
7160 /* Since in 32-bit, variable arguments are always passed on
7161 stack, there is scratch register available for indirect
7163 cfun->machine->arg_reg_available = true;
7166 cum->warn_avx512f = false;
7167 cum->warn_avx = false;
7168 cum->warn_sse = false;
7169 cum->warn_mmx = false;
7173 /* Use ecx and edx registers if function has fastcall attribute,
7174 else look for regparm information. */
7177 unsigned int ccvt = ix86_get_callcvt (fntype);
7178 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7181 cum->fastcall = 1; /* Same first register as in fastcall. */
7183 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7189 cum->nregs = ix86_function_regparm (fntype, fndecl);
7192 /* Set up the number of SSE registers used for passing SFmode
7193 and DFmode arguments. Warn for mismatching ABI. */
7194 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7197 cfun->machine->arg_reg_available = (cum->nregs > 0);
7200 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7201 But in the case of vector types, it is some vector mode.
7203 When we have only some of our vector isa extensions enabled, then there
7204 are some modes for which vector_mode_supported_p is false. For these
7205 modes, the generic vector support in gcc will choose some non-vector mode
7206 in order to implement the type. By computing the natural mode, we'll
7207 select the proper ABI location for the operand and not depend on whatever
7208 the middle-end decides to do with these vector types.
7210 The midde-end can't deal with the vector types > 16 bytes. In this
7211 case, we return the original mode and warn ABI change if CUM isn't
7214 If INT_RETURN is true, warn ABI change if the vector mode isn't
7215 available for function return value. */
7218 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7221 machine_mode mode = TYPE_MODE (type);
7223 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7225 HOST_WIDE_INT size = int_size_in_bytes (type);
7226 if ((size == 8 || size == 16 || size == 32 || size == 64)
7227 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7228 && TYPE_VECTOR_SUBPARTS (type) > 1)
7230 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7232 /* There are no XFmode vector modes. */
7233 if (innermode == XFmode)
7236 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7237 mode = MIN_MODE_VECTOR_FLOAT;
7239 mode = MIN_MODE_VECTOR_INT;
7241 /* Get the mode which has this inner mode and number of units. */
7242 FOR_EACH_MODE_FROM (mode, mode)
7243 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7244 && GET_MODE_INNER (mode) == innermode)
7246 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7248 static bool warnedavx512f;
7249 static bool warnedavx512f_ret;
7251 if (cum && cum->warn_avx512f && !warnedavx512f)
7253 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7254 "without AVX512F enabled changes the ABI"))
7255 warnedavx512f = true;
7257 else if (in_return && !warnedavx512f_ret)
7259 if (warning (OPT_Wpsabi, "AVX512F vector return "
7260 "without AVX512F enabled changes the ABI"))
7261 warnedavx512f_ret = true;
7264 return TYPE_MODE (type);
7266 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7268 static bool warnedavx;
7269 static bool warnedavx_ret;
7271 if (cum && cum->warn_avx && !warnedavx)
7273 if (warning (OPT_Wpsabi, "AVX vector argument "
7274 "without AVX enabled changes the ABI"))
7277 else if (in_return && !warnedavx_ret)
7279 if (warning (OPT_Wpsabi, "AVX vector return "
7280 "without AVX enabled changes the ABI"))
7281 warnedavx_ret = true;
7284 return TYPE_MODE (type);
7286 else if (((size == 8 && TARGET_64BIT) || size == 16)
7290 static bool warnedsse;
7291 static bool warnedsse_ret;
7293 if (cum && cum->warn_sse && !warnedsse)
7295 if (warning (OPT_Wpsabi, "SSE vector argument "
7296 "without SSE enabled changes the ABI"))
7299 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7301 if (warning (OPT_Wpsabi, "SSE vector return "
7302 "without SSE enabled changes the ABI"))
7303 warnedsse_ret = true;
7306 else if ((size == 8 && !TARGET_64BIT)
7308 || cfun->machine->func_type == TYPE_NORMAL)
7312 static bool warnedmmx;
7313 static bool warnedmmx_ret;
7315 if (cum && cum->warn_mmx && !warnedmmx)
7317 if (warning (OPT_Wpsabi, "MMX vector argument "
7318 "without MMX enabled changes the ABI"))
7321 else if (in_return && !warnedmmx_ret)
7323 if (warning (OPT_Wpsabi, "MMX vector return "
7324 "without MMX enabled changes the ABI"))
7325 warnedmmx_ret = true;
7338 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7339 this may not agree with the mode that the type system has chosen for the
7340 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7341 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7344 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7349 if (orig_mode != BLKmode)
7350 tmp = gen_rtx_REG (orig_mode, regno);
7353 tmp = gen_rtx_REG (mode, regno);
7354 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7355 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7361 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7362 of this code is to classify each 8bytes of incoming argument by the register
7363 class and assign registers accordingly. */
7365 /* Return the union class of CLASS1 and CLASS2.
7366 See the x86-64 PS ABI for details. */
7368 static enum x86_64_reg_class
7369 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7371 /* Rule #1: If both classes are equal, this is the resulting class. */
7372 if (class1 == class2)
7375 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7377 if (class1 == X86_64_NO_CLASS)
7379 if (class2 == X86_64_NO_CLASS)
7382 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7383 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7384 return X86_64_MEMORY_CLASS;
7386 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7387 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7388 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7389 return X86_64_INTEGERSI_CLASS;
7390 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7391 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7392 return X86_64_INTEGER_CLASS;
7394 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7396 if (class1 == X86_64_X87_CLASS
7397 || class1 == X86_64_X87UP_CLASS
7398 || class1 == X86_64_COMPLEX_X87_CLASS
7399 || class2 == X86_64_X87_CLASS
7400 || class2 == X86_64_X87UP_CLASS
7401 || class2 == X86_64_COMPLEX_X87_CLASS)
7402 return X86_64_MEMORY_CLASS;
7404 /* Rule #6: Otherwise class SSE is used. */
7405 return X86_64_SSE_CLASS;
7408 /* Classify the argument of type TYPE and mode MODE.
7409 CLASSES will be filled by the register class used to pass each word
7410 of the operand. The number of words is returned. In case the parameter
7411 should be passed in memory, 0 is returned. As a special case for zero
7412 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7414 BIT_OFFSET is used internally for handling records and specifies offset
7415 of the offset in bits modulo 512 to avoid overflow cases.
7417 See the x86-64 PS ABI for details.
7421 classify_argument (machine_mode mode, const_tree type,
7422 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7425 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7426 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7428 /* Variable sized entities are always passed/returned in memory. */
7432 if (mode != VOIDmode
7433 && targetm.calls.must_pass_in_stack (mode, type))
7436 if (type && AGGREGATE_TYPE_P (type))
7440 enum x86_64_reg_class subclasses[MAX_CLASSES];
7442 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7446 for (i = 0; i < words; i++)
7447 classes[i] = X86_64_NO_CLASS;
7449 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7450 signalize memory class, so handle it as special case. */
7453 classes[0] = X86_64_NO_CLASS;
7457 /* Classify each field of record and merge classes. */
7458 switch (TREE_CODE (type))
7461 /* And now merge the fields of structure. */
7462 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7464 if (TREE_CODE (field) == FIELD_DECL)
7468 if (TREE_TYPE (field) == error_mark_node)
7471 /* Bitfields are always classified as integer. Handle them
7472 early, since later code would consider them to be
7473 misaligned integers. */
7474 if (DECL_BIT_FIELD (field))
7476 for (i = (int_bit_position (field)
7477 + (bit_offset % 64)) / 8 / 8;
7478 i < ((int_bit_position (field) + (bit_offset % 64))
7479 + tree_to_shwi (DECL_SIZE (field))
7482 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
7488 type = TREE_TYPE (field);
7490 /* Flexible array member is ignored. */
7491 if (TYPE_MODE (type) == BLKmode
7492 && TREE_CODE (type) == ARRAY_TYPE
7493 && TYPE_SIZE (type) == NULL_TREE
7494 && TYPE_DOMAIN (type) != NULL_TREE
7495 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7500 if (!warned && warn_psabi)
7503 inform (input_location,
7504 "the ABI of passing struct with"
7505 " a flexible array member has"
7506 " changed in GCC 4.4");
7510 num = classify_argument (TYPE_MODE (type), type,
7512 (int_bit_position (field)
7513 + bit_offset) % 512);
7516 pos = (int_bit_position (field)
7517 + (bit_offset % 64)) / 8 / 8;
7518 for (i = 0; i < num && (i + pos) < words; i++)
7520 = merge_classes (subclasses[i], classes[i + pos]);
7527 /* Arrays are handled as small records. */
7530 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7531 TREE_TYPE (type), subclasses, bit_offset);
7535 /* The partial classes are now full classes. */
7536 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7537 subclasses[0] = X86_64_SSE_CLASS;
7538 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7539 && !((bit_offset % 64) == 0 && bytes == 4))
7540 subclasses[0] = X86_64_INTEGER_CLASS;
7542 for (i = 0; i < words; i++)
7543 classes[i] = subclasses[i % num];
7548 case QUAL_UNION_TYPE:
7549 /* Unions are similar to RECORD_TYPE but offset is always 0.
7551 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7553 if (TREE_CODE (field) == FIELD_DECL)
7557 if (TREE_TYPE (field) == error_mark_node)
7560 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7561 TREE_TYPE (field), subclasses,
7565 for (i = 0; i < num && i < words; i++)
7566 classes[i] = merge_classes (subclasses[i], classes[i]);
7577 /* When size > 16 bytes, if the first one isn't
7578 X86_64_SSE_CLASS or any other ones aren't
7579 X86_64_SSEUP_CLASS, everything should be passed in
7581 if (classes[0] != X86_64_SSE_CLASS)
7584 for (i = 1; i < words; i++)
7585 if (classes[i] != X86_64_SSEUP_CLASS)
7589 /* Final merger cleanup. */
7590 for (i = 0; i < words; i++)
7592 /* If one class is MEMORY, everything should be passed in
7594 if (classes[i] == X86_64_MEMORY_CLASS)
7597 /* The X86_64_SSEUP_CLASS should be always preceded by
7598 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7599 if (classes[i] == X86_64_SSEUP_CLASS
7600 && classes[i - 1] != X86_64_SSE_CLASS
7601 && classes[i - 1] != X86_64_SSEUP_CLASS)
7603 /* The first one should never be X86_64_SSEUP_CLASS. */
7604 gcc_assert (i != 0);
7605 classes[i] = X86_64_SSE_CLASS;
7608 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7609 everything should be passed in memory. */
7610 if (classes[i] == X86_64_X87UP_CLASS
7611 && (classes[i - 1] != X86_64_X87_CLASS))
7615 /* The first one should never be X86_64_X87UP_CLASS. */
7616 gcc_assert (i != 0);
7617 if (!warned && warn_psabi)
7620 inform (input_location,
7621 "the ABI of passing union with long double"
7622 " has changed in GCC 4.4");
7630 /* Compute alignment needed. We align all types to natural boundaries with
7631 exception of XFmode that is aligned to 64bits. */
7632 if (mode != VOIDmode && mode != BLKmode)
7634 int mode_alignment = GET_MODE_BITSIZE (mode);
7637 mode_alignment = 128;
7638 else if (mode == XCmode)
7639 mode_alignment = 256;
7640 if (COMPLEX_MODE_P (mode))
7641 mode_alignment /= 2;
7642 /* Misaligned fields are always returned in memory. */
7643 if (bit_offset % mode_alignment)
7647 /* for V1xx modes, just use the base mode */
7648 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7649 && GET_MODE_UNIT_SIZE (mode) == bytes)
7650 mode = GET_MODE_INNER (mode);
7652 /* Classification of atomic types. */
7657 classes[0] = X86_64_SSE_CLASS;
7660 classes[0] = X86_64_SSE_CLASS;
7661 classes[1] = X86_64_SSEUP_CLASS;
7671 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7673 /* Analyze last 128 bits only. */
7674 size = (size - 1) & 0x7f;
7678 classes[0] = X86_64_INTEGERSI_CLASS;
7683 classes[0] = X86_64_INTEGER_CLASS;
7686 else if (size < 64+32)
7688 classes[0] = X86_64_INTEGER_CLASS;
7689 classes[1] = X86_64_INTEGERSI_CLASS;
7692 else if (size < 64+64)
7694 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7702 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7706 /* OImode shouldn't be used directly. */
7711 if (!(bit_offset % 64))
7712 classes[0] = X86_64_SSESF_CLASS;
7714 classes[0] = X86_64_SSE_CLASS;
7717 classes[0] = X86_64_SSEDF_CLASS;
7720 classes[0] = X86_64_X87_CLASS;
7721 classes[1] = X86_64_X87UP_CLASS;
7724 classes[0] = X86_64_SSE_CLASS;
7725 classes[1] = X86_64_SSEUP_CLASS;
7728 classes[0] = X86_64_SSE_CLASS;
7729 if (!(bit_offset % 64))
7735 if (!warned && warn_psabi)
7738 inform (input_location,
7739 "the ABI of passing structure with complex float"
7740 " member has changed in GCC 4.4");
7742 classes[1] = X86_64_SSESF_CLASS;
7746 classes[0] = X86_64_SSEDF_CLASS;
7747 classes[1] = X86_64_SSEDF_CLASS;
7750 classes[0] = X86_64_COMPLEX_X87_CLASS;
7753 /* This modes is larger than 16 bytes. */
7761 classes[0] = X86_64_SSE_CLASS;
7762 classes[1] = X86_64_SSEUP_CLASS;
7763 classes[2] = X86_64_SSEUP_CLASS;
7764 classes[3] = X86_64_SSEUP_CLASS;
7772 classes[0] = X86_64_SSE_CLASS;
7773 classes[1] = X86_64_SSEUP_CLASS;
7774 classes[2] = X86_64_SSEUP_CLASS;
7775 classes[3] = X86_64_SSEUP_CLASS;
7776 classes[4] = X86_64_SSEUP_CLASS;
7777 classes[5] = X86_64_SSEUP_CLASS;
7778 classes[6] = X86_64_SSEUP_CLASS;
7779 classes[7] = X86_64_SSEUP_CLASS;
7787 classes[0] = X86_64_SSE_CLASS;
7788 classes[1] = X86_64_SSEUP_CLASS;
7796 classes[0] = X86_64_SSE_CLASS;
7802 gcc_assert (VECTOR_MODE_P (mode));
7807 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7809 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7810 classes[0] = X86_64_INTEGERSI_CLASS;
7812 classes[0] = X86_64_INTEGER_CLASS;
7813 classes[1] = X86_64_INTEGER_CLASS;
7814 return 1 + (bytes > 8);
7818 /* Examine the argument and return set number of register required in each
7819 class. Return true iff parameter should be passed in memory. */
7822 examine_argument (machine_mode mode, const_tree type, int in_return,
7823 int *int_nregs, int *sse_nregs)
7825 enum x86_64_reg_class regclass[MAX_CLASSES];
7826 int n = classify_argument (mode, type, regclass, 0);
7833 for (n--; n >= 0; n--)
7834 switch (regclass[n])
7836 case X86_64_INTEGER_CLASS:
7837 case X86_64_INTEGERSI_CLASS:
7840 case X86_64_SSE_CLASS:
7841 case X86_64_SSESF_CLASS:
7842 case X86_64_SSEDF_CLASS:
7845 case X86_64_NO_CLASS:
7846 case X86_64_SSEUP_CLASS:
7848 case X86_64_X87_CLASS:
7849 case X86_64_X87UP_CLASS:
7850 case X86_64_COMPLEX_X87_CLASS:
7854 case X86_64_MEMORY_CLASS:
7861 /* Construct container for the argument used by GCC interface. See
7862 FUNCTION_ARG for the detailed description. */
7865 construct_container (machine_mode mode, machine_mode orig_mode,
7866 const_tree type, int in_return, int nintregs, int nsseregs,
7867 const int *intreg, int sse_regno)
7869 /* The following variables hold the static issued_error state. */
7870 static bool issued_sse_arg_error;
7871 static bool issued_sse_ret_error;
7872 static bool issued_x87_ret_error;
7874 machine_mode tmpmode;
7876 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7877 enum x86_64_reg_class regclass[MAX_CLASSES];
7881 int needed_sseregs, needed_intregs;
7882 rtx exp[MAX_CLASSES];
7885 n = classify_argument (mode, type, regclass, 0);
7888 if (examine_argument (mode, type, in_return, &needed_intregs,
7891 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7894 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7895 some less clueful developer tries to use floating-point anyway. */
7896 if (needed_sseregs && !TARGET_SSE)
7900 if (!issued_sse_ret_error)
7902 error ("SSE register return with SSE disabled");
7903 issued_sse_ret_error = true;
7906 else if (!issued_sse_arg_error)
7908 error ("SSE register argument with SSE disabled");
7909 issued_sse_arg_error = true;
7914 /* Likewise, error if the ABI requires us to return values in the
7915 x87 registers and the user specified -mno-80387. */
7916 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7917 for (i = 0; i < n; i++)
7918 if (regclass[i] == X86_64_X87_CLASS
7919 || regclass[i] == X86_64_X87UP_CLASS
7920 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7922 if (!issued_x87_ret_error)
7924 error ("x87 register return with x87 disabled");
7925 issued_x87_ret_error = true;
7930 /* First construct simple cases. Avoid SCmode, since we want to use
7931 single register to pass this type. */
7932 if (n == 1 && mode != SCmode)
7933 switch (regclass[0])
7935 case X86_64_INTEGER_CLASS:
7936 case X86_64_INTEGERSI_CLASS:
7937 return gen_rtx_REG (mode, intreg[0]);
7938 case X86_64_SSE_CLASS:
7939 case X86_64_SSESF_CLASS:
7940 case X86_64_SSEDF_CLASS:
7941 if (mode != BLKmode)
7942 return gen_reg_or_parallel (mode, orig_mode,
7943 GET_SSE_REGNO (sse_regno));
7945 case X86_64_X87_CLASS:
7946 case X86_64_COMPLEX_X87_CLASS:
7947 return gen_rtx_REG (mode, FIRST_STACK_REG);
7948 case X86_64_NO_CLASS:
7949 /* Zero sized array, struct or class. */
7955 && regclass[0] == X86_64_SSE_CLASS
7956 && regclass[1] == X86_64_SSEUP_CLASS
7958 return gen_reg_or_parallel (mode, orig_mode,
7959 GET_SSE_REGNO (sse_regno));
7961 && regclass[0] == X86_64_SSE_CLASS
7962 && regclass[1] == X86_64_SSEUP_CLASS
7963 && regclass[2] == X86_64_SSEUP_CLASS
7964 && regclass[3] == X86_64_SSEUP_CLASS
7966 return gen_reg_or_parallel (mode, orig_mode,
7967 GET_SSE_REGNO (sse_regno));
7969 && regclass[0] == X86_64_SSE_CLASS
7970 && regclass[1] == X86_64_SSEUP_CLASS
7971 && regclass[2] == X86_64_SSEUP_CLASS
7972 && regclass[3] == X86_64_SSEUP_CLASS
7973 && regclass[4] == X86_64_SSEUP_CLASS
7974 && regclass[5] == X86_64_SSEUP_CLASS
7975 && regclass[6] == X86_64_SSEUP_CLASS
7976 && regclass[7] == X86_64_SSEUP_CLASS
7978 return gen_reg_or_parallel (mode, orig_mode,
7979 GET_SSE_REGNO (sse_regno));
7981 && regclass[0] == X86_64_X87_CLASS
7982 && regclass[1] == X86_64_X87UP_CLASS)
7983 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7986 && regclass[0] == X86_64_INTEGER_CLASS
7987 && regclass[1] == X86_64_INTEGER_CLASS
7988 && (mode == CDImode || mode == TImode || mode == BLKmode)
7989 && intreg[0] + 1 == intreg[1])
7991 if (mode == BLKmode)
7993 /* Use TImode for BLKmode values in 2 integer registers. */
7994 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
7995 gen_rtx_REG (TImode, intreg[0]),
7997 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
7998 XVECEXP (ret, 0, 0) = exp[0];
8002 return gen_rtx_REG (mode, intreg[0]);
8005 /* Otherwise figure out the entries of the PARALLEL. */
8006 for (i = 0; i < n; i++)
8010 switch (regclass[i])
8012 case X86_64_NO_CLASS:
8014 case X86_64_INTEGER_CLASS:
8015 case X86_64_INTEGERSI_CLASS:
8016 /* Merge TImodes on aligned occasions here too. */
8017 if (i * 8 + 8 > bytes)
8019 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8020 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8021 /* We've requested 24 bytes we
8022 don't have mode for. Use DImode. */
8025 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8030 = gen_rtx_EXPR_LIST (VOIDmode,
8031 gen_rtx_REG (tmpmode, *intreg),
8035 case X86_64_SSESF_CLASS:
8037 = gen_rtx_EXPR_LIST (VOIDmode,
8038 gen_rtx_REG (SFmode,
8039 GET_SSE_REGNO (sse_regno)),
8043 case X86_64_SSEDF_CLASS:
8045 = gen_rtx_EXPR_LIST (VOIDmode,
8046 gen_rtx_REG (DFmode,
8047 GET_SSE_REGNO (sse_regno)),
8051 case X86_64_SSE_CLASS:
8059 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8069 && regclass[1] == X86_64_SSEUP_CLASS
8070 && regclass[2] == X86_64_SSEUP_CLASS
8071 && regclass[3] == X86_64_SSEUP_CLASS);
8077 && regclass[1] == X86_64_SSEUP_CLASS
8078 && regclass[2] == X86_64_SSEUP_CLASS
8079 && regclass[3] == X86_64_SSEUP_CLASS
8080 && regclass[4] == X86_64_SSEUP_CLASS
8081 && regclass[5] == X86_64_SSEUP_CLASS
8082 && regclass[6] == X86_64_SSEUP_CLASS
8083 && regclass[7] == X86_64_SSEUP_CLASS);
8091 = gen_rtx_EXPR_LIST (VOIDmode,
8092 gen_rtx_REG (tmpmode,
8093 GET_SSE_REGNO (sse_regno)),
8102 /* Empty aligned struct, union or class. */
8106 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8107 for (i = 0; i < nexps; i++)
8108 XVECEXP (ret, 0, i) = exp [i];
8112 /* Update the data in CUM to advance over an argument of mode MODE
8113 and data type TYPE. (TYPE is null for libcalls where that information
8114 may not be available.)
8116 Return a number of integer regsiters advanced over. */
8119 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8120 const_tree type, HOST_WIDE_INT bytes,
8121 HOST_WIDE_INT words)
8124 bool error_p = false;
8128 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8129 bytes in registers. */
8130 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8150 cum->words += words;
8151 cum->nregs -= words;
8152 cum->regno += words;
8153 if (cum->nregs >= 0)
8155 if (cum->nregs <= 0)
8158 cfun->machine->arg_reg_available = false;
8164 /* OImode shouldn't be used directly. */
8168 if (cum->float_in_sse == -1)
8170 if (cum->float_in_sse < 2)
8174 if (cum->float_in_sse == -1)
8176 if (cum->float_in_sse < 1)
8199 if (!type || !AGGREGATE_TYPE_P (type))
8201 cum->sse_words += words;
8202 cum->sse_nregs -= 1;
8203 cum->sse_regno += 1;
8204 if (cum->sse_nregs <= 0)
8218 if (!type || !AGGREGATE_TYPE_P (type))
8220 cum->mmx_words += words;
8221 cum->mmx_nregs -= 1;
8222 cum->mmx_regno += 1;
8223 if (cum->mmx_nregs <= 0)
8233 cum->float_in_sse = 0;
8234 error ("calling %qD with SSE calling convention without "
8235 "SSE/SSE2 enabled", cum->decl);
8236 sorry ("this is a GCC bug that can be worked around by adding "
8237 "attribute used to function called");
8244 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8245 const_tree type, HOST_WIDE_INT words, bool named)
8247 int int_nregs, sse_nregs;
8249 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8250 if (!named && (VALID_AVX512F_REG_MODE (mode)
8251 || VALID_AVX256_REG_MODE (mode)))
8254 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8255 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8257 cum->nregs -= int_nregs;
8258 cum->sse_nregs -= sse_nregs;
8259 cum->regno += int_nregs;
8260 cum->sse_regno += sse_nregs;
8265 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8266 cum->words = ROUND_UP (cum->words, align);
8267 cum->words += words;
8273 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8274 HOST_WIDE_INT words)
8276 /* Otherwise, this should be passed indirect. */
8277 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8279 cum->words += words;
8289 /* Update the data in CUM to advance over an argument of mode MODE and
8290 data type TYPE. (TYPE is null for libcalls where that information
8291 may not be available.) */
8294 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8295 const_tree type, bool named)
8297 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8298 HOST_WIDE_INT bytes, words;
8301 /* The argument of interrupt handler is a special case and is
8302 handled in ix86_function_arg. */
8303 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8306 if (mode == BLKmode)
8307 bytes = int_size_in_bytes (type);
8309 bytes = GET_MODE_SIZE (mode);
8310 words = CEIL (bytes, UNITS_PER_WORD);
8313 mode = type_natural_mode (type, NULL, false);
8317 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8319 if (call_abi == MS_ABI)
8320 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8322 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8325 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8327 /* For pointers passed in memory we expect bounds passed in Bounds
8331 /* Track if there are outgoing arguments on stack. */
8333 cfun->machine->outgoing_args_on_stack = true;
8337 /* Define where to put the arguments to a function.
8338 Value is zero to push the argument on the stack,
8339 or a hard register in which to store the argument.
8341 MODE is the argument's machine mode.
8342 TYPE is the data type of the argument (as a tree).
8343 This is null for libcalls where that information may
8345 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8346 the preceding args and about the function being called.
8347 NAMED is nonzero if this argument is a named parameter
8348 (otherwise it is an extra parameter matching an ellipsis). */
8351 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8352 machine_mode orig_mode, const_tree type,
8353 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8355 bool error_p = false;
8357 /* Avoid the AL settings for the Unix64 ABI. */
8358 if (mode == VOIDmode)
8363 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8364 bytes in registers. */
8365 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8384 if (words <= cum->nregs)
8386 int regno = cum->regno;
8388 /* Fastcall allocates the first two DWORD (SImode) or
8389 smaller arguments to ECX and EDX if it isn't an
8395 || (type && AGGREGATE_TYPE_P (type)))
8398 /* ECX not EAX is the first allocated register. */
8399 if (regno == AX_REG)
8402 return gen_rtx_REG (mode, regno);
8407 if (cum->float_in_sse == -1)
8409 if (cum->float_in_sse < 2)
8413 if (cum->float_in_sse == -1)
8415 if (cum->float_in_sse < 1)
8419 /* In 32bit, we pass TImode in xmm registers. */
8426 if (!type || !AGGREGATE_TYPE_P (type))
8429 return gen_reg_or_parallel (mode, orig_mode,
8430 cum->sse_regno + FIRST_SSE_REG);
8436 /* OImode and XImode shouldn't be used directly. */
8451 if (!type || !AGGREGATE_TYPE_P (type))
8454 return gen_reg_or_parallel (mode, orig_mode,
8455 cum->sse_regno + FIRST_SSE_REG);
8465 if (!type || !AGGREGATE_TYPE_P (type))
8468 return gen_reg_or_parallel (mode, orig_mode,
8469 cum->mmx_regno + FIRST_MMX_REG);
8475 cum->float_in_sse = 0;
8476 error ("calling %qD with SSE calling convention without "
8477 "SSE/SSE2 enabled", cum->decl);
8478 sorry ("this is a GCC bug that can be worked around by adding "
8479 "attribute used to function called");
8486 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8487 machine_mode orig_mode, const_tree type, bool named)
8489 /* Handle a hidden AL argument containing number of registers
8490 for varargs x86-64 functions. */
8491 if (mode == VOIDmode)
8492 return GEN_INT (cum->maybe_vaarg
8493 ? (cum->sse_nregs < 0
8494 ? X86_64_SSE_REGPARM_MAX
8515 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8521 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8523 &x86_64_int_parameter_registers [cum->regno],
8528 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8529 machine_mode orig_mode, bool named,
8530 HOST_WIDE_INT bytes)
8534 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8535 We use value of -2 to specify that current function call is MSABI. */
8536 if (mode == VOIDmode)
8537 return GEN_INT (-2);
8539 /* If we've run out of registers, it goes on the stack. */
8540 if (cum->nregs == 0)
8543 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8545 /* Only floating point modes are passed in anything but integer regs. */
8546 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8549 regno = cum->regno + FIRST_SSE_REG;
8554 /* Unnamed floating parameters are passed in both the
8555 SSE and integer registers. */
8556 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8557 t2 = gen_rtx_REG (mode, regno);
8558 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8559 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8560 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8563 /* Handle aggregated types passed in register. */
8564 if (orig_mode == BLKmode)
8566 if (bytes > 0 && bytes <= 8)
8567 mode = (bytes > 4 ? DImode : SImode);
8568 if (mode == BLKmode)
8572 return gen_reg_or_parallel (mode, orig_mode, regno);
8575 /* Return where to put the arguments to a function.
8576 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8578 MODE is the argument's machine mode. TYPE is the data type of the
8579 argument. It is null for libcalls where that information may not be
8580 available. CUM gives information about the preceding args and about
8581 the function being called. NAMED is nonzero if this argument is a
8582 named parameter (otherwise it is an extra parameter matching an
8586 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8587 const_tree type, bool named)
8589 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8590 machine_mode mode = omode;
8591 HOST_WIDE_INT bytes, words;
8594 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8596 gcc_assert (type != NULL_TREE);
8597 if (POINTER_TYPE_P (type))
8599 /* This is the pointer argument. */
8600 gcc_assert (TYPE_MODE (type) == Pmode);
8601 /* It is at -WORD(AP) in the current frame in interrupt and
8602 exception handlers. */
8603 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8607 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8608 && TREE_CODE (type) == INTEGER_TYPE
8609 && TYPE_MODE (type) == word_mode);
8610 /* The error code is the word-mode integer argument at
8611 -2 * WORD(AP) in the current frame of the exception
8613 arg = gen_rtx_MEM (word_mode,
8614 plus_constant (Pmode,
8616 -2 * UNITS_PER_WORD));
8621 if (mode == BLKmode)
8622 bytes = int_size_in_bytes (type);
8624 bytes = GET_MODE_SIZE (mode);
8625 words = CEIL (bytes, UNITS_PER_WORD);
8627 /* To simplify the code below, represent vector types with a vector mode
8628 even if MMX/SSE are not active. */
8629 if (type && TREE_CODE (type) == VECTOR_TYPE)
8630 mode = type_natural_mode (type, cum, false);
8634 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8636 if (call_abi == MS_ABI)
8637 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8639 arg = function_arg_64 (cum, mode, omode, type, named);
8642 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8644 /* Track if there are outgoing arguments on stack. */
8645 if (arg == NULL_RTX && cum->caller)
8646 cfun->machine->outgoing_args_on_stack = true;
8651 /* A C expression that indicates when an argument must be passed by
8652 reference. If nonzero for an argument, a copy of that argument is
8653 made in memory and a pointer to the argument is passed instead of
8654 the argument itself. The pointer is passed in whatever way is
8655 appropriate for passing a pointer to that type. */
8658 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8659 const_tree type, bool)
8661 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8665 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8667 /* See Windows x64 Software Convention. */
8668 if (call_abi == MS_ABI)
8670 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8674 /* Arrays are passed by reference. */
8675 if (TREE_CODE (type) == ARRAY_TYPE)
8678 if (RECORD_OR_UNION_TYPE_P (type))
8680 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8681 are passed by reference. */
8682 msize = int_size_in_bytes (type);
8686 /* __m128 is passed by reference. */
8687 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8689 else if (type && int_size_in_bytes (type) == -1)
8696 /* Return true when TYPE should be 128bit aligned for 32bit argument
8697 passing ABI. XXX: This function is obsolete and is only used for
8698 checking psABI compatibility with previous versions of GCC. */
8701 ix86_compat_aligned_value_p (const_tree type)
8703 machine_mode mode = TYPE_MODE (type);
8704 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8708 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8710 if (TYPE_ALIGN (type) < 128)
8713 if (AGGREGATE_TYPE_P (type))
8715 /* Walk the aggregates recursively. */
8716 switch (TREE_CODE (type))
8720 case QUAL_UNION_TYPE:
8724 /* Walk all the structure fields. */
8725 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8727 if (TREE_CODE (field) == FIELD_DECL
8728 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8735 /* Just for use if some languages passes arrays by value. */
8736 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8747 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8748 XXX: This function is obsolete and is only used for checking psABI
8749 compatibility with previous versions of GCC. */
8752 ix86_compat_function_arg_boundary (machine_mode mode,
8753 const_tree type, unsigned int align)
8755 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8756 natural boundaries. */
8757 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8759 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8760 make an exception for SSE modes since these require 128bit
8763 The handling here differs from field_alignment. ICC aligns MMX
8764 arguments to 4 byte boundaries, while structure fields are aligned
8765 to 8 byte boundaries. */
8768 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8769 align = PARM_BOUNDARY;
8773 if (!ix86_compat_aligned_value_p (type))
8774 align = PARM_BOUNDARY;
8777 if (align > BIGGEST_ALIGNMENT)
8778 align = BIGGEST_ALIGNMENT;
8782 /* Return true when TYPE should be 128bit aligned for 32bit argument
8786 ix86_contains_aligned_value_p (const_tree type)
8788 machine_mode mode = TYPE_MODE (type);
8790 if (mode == XFmode || mode == XCmode)
8793 if (TYPE_ALIGN (type) < 128)
8796 if (AGGREGATE_TYPE_P (type))
8798 /* Walk the aggregates recursively. */
8799 switch (TREE_CODE (type))
8803 case QUAL_UNION_TYPE:
8807 /* Walk all the structure fields. */
8808 for (field = TYPE_FIELDS (type);
8810 field = DECL_CHAIN (field))
8812 if (TREE_CODE (field) == FIELD_DECL
8813 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8820 /* Just for use if some languages passes arrays by value. */
8821 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8830 return TYPE_ALIGN (type) >= 128;
8835 /* Gives the alignment boundary, in bits, of an argument with the
8836 specified mode and type. */
8839 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8844 /* Since the main variant type is used for call, we convert it to
8845 the main variant type. */
8846 type = TYPE_MAIN_VARIANT (type);
8847 align = TYPE_ALIGN (type);
8848 if (TYPE_EMPTY_P (type))
8849 return PARM_BOUNDARY;
8852 align = GET_MODE_ALIGNMENT (mode);
8853 if (align < PARM_BOUNDARY)
8854 align = PARM_BOUNDARY;
8858 unsigned int saved_align = align;
8862 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8865 if (mode == XFmode || mode == XCmode)
8866 align = PARM_BOUNDARY;
8868 else if (!ix86_contains_aligned_value_p (type))
8869 align = PARM_BOUNDARY;
8872 align = PARM_BOUNDARY;
8877 && align != ix86_compat_function_arg_boundary (mode, type,
8881 inform (input_location,
8882 "The ABI for passing parameters with %d-byte"
8883 " alignment has changed in GCC 4.6",
8884 align / BITS_PER_UNIT);
8891 /* Return true if N is a possible register number of function value. */
8894 ix86_function_value_regno_p (const unsigned int regno)
8901 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8904 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8906 /* Complex values are returned in %st(0)/%st(1) pair. */
8909 /* TODO: The function should depend on current function ABI but
8910 builtins.c would need updating then. Therefore we use the
8912 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8914 return TARGET_FLOAT_RETURNS_IN_80387;
8916 /* Complex values are returned in %xmm0/%xmm1 pair. */
8922 if (TARGET_MACHO || TARGET_64BIT)
8930 /* Define how to find the value returned by a function.
8931 VALTYPE is the data type of the value (as a tree).
8932 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8933 otherwise, FUNC is 0. */
8936 function_value_32 (machine_mode orig_mode, machine_mode mode,
8937 const_tree fntype, const_tree fn)
8941 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8942 we normally prevent this case when mmx is not available. However
8943 some ABIs may require the result to be returned like DImode. */
8944 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8945 regno = FIRST_MMX_REG;
8947 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8948 we prevent this case when sse is not available. However some ABIs
8949 may require the result to be returned like integer TImode. */
8950 else if (mode == TImode
8951 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8952 regno = FIRST_SSE_REG;
8954 /* 32-byte vector modes in %ymm0. */
8955 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8956 regno = FIRST_SSE_REG;
8958 /* 64-byte vector modes in %zmm0. */
8959 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8960 regno = FIRST_SSE_REG;
8962 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8963 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8964 regno = FIRST_FLOAT_REG;
8966 /* Most things go in %eax. */
8969 /* Override FP return register with %xmm0 for local functions when
8970 SSE math is enabled or for functions with sseregparm attribute. */
8971 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8973 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8974 if (sse_level == -1)
8976 error ("calling %qD with SSE calling convention without "
8977 "SSE/SSE2 enabled", fn);
8978 sorry ("this is a GCC bug that can be worked around by adding "
8979 "attribute used to function called");
8981 else if ((sse_level >= 1 && mode == SFmode)
8982 || (sse_level == 2 && mode == DFmode))
8983 regno = FIRST_SSE_REG;
8986 /* OImode shouldn't be used directly. */
8987 gcc_assert (mode != OImode);
8989 return gen_rtx_REG (orig_mode, regno);
8993 function_value_64 (machine_mode orig_mode, machine_mode mode,
8998 /* Handle libcalls, which don't provide a type node. */
8999 if (valtype == NULL)
9013 regno = FIRST_SSE_REG;
9017 regno = FIRST_FLOAT_REG;
9025 return gen_rtx_REG (mode, regno);
9027 else if (POINTER_TYPE_P (valtype))
9029 /* Pointers are always returned in word_mode. */
9033 ret = construct_container (mode, orig_mode, valtype, 1,
9034 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9035 x86_64_int_return_registers, 0);
9037 /* For zero sized structures, construct_container returns NULL, but we
9038 need to keep rest of compiler happy by returning meaningful value. */
9040 ret = gen_rtx_REG (orig_mode, AX_REG);
9046 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9049 unsigned int regno = AX_REG;
9053 switch (GET_MODE_SIZE (mode))
9056 if (valtype != NULL_TREE
9057 && !VECTOR_INTEGER_TYPE_P (valtype)
9058 && !VECTOR_INTEGER_TYPE_P (valtype)
9059 && !INTEGRAL_TYPE_P (valtype)
9060 && !VECTOR_FLOAT_TYPE_P (valtype))
9062 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9063 && !COMPLEX_MODE_P (mode))
9064 regno = FIRST_SSE_REG;
9068 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
9070 if (mode == SFmode || mode == DFmode)
9071 regno = FIRST_SSE_REG;
9077 return gen_rtx_REG (orig_mode, regno);
9081 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9082 machine_mode orig_mode, machine_mode mode)
9084 const_tree fn, fntype;
9087 if (fntype_or_decl && DECL_P (fntype_or_decl))
9088 fn = fntype_or_decl;
9089 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9091 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9092 return function_value_ms_64 (orig_mode, mode, valtype);
9093 else if (TARGET_64BIT)
9094 return function_value_64 (orig_mode, mode, valtype);
9096 return function_value_32 (orig_mode, mode, fntype, fn);
9100 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9102 machine_mode mode, orig_mode;
9104 orig_mode = TYPE_MODE (valtype);
9105 mode = type_natural_mode (valtype, NULL, true);
9106 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9109 /* Pointer function arguments and return values are promoted to
9110 word_mode for normal functions. */
9113 ix86_promote_function_mode (const_tree type, machine_mode mode,
9114 int *punsignedp, const_tree fntype,
9117 if (cfun->machine->func_type == TYPE_NORMAL
9118 && type != NULL_TREE
9119 && POINTER_TYPE_P (type))
9121 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9124 return default_promote_function_mode (type, mode, punsignedp, fntype,
9128 /* Return true if a structure, union or array with MODE containing FIELD
9129 should be accessed using BLKmode. */
9132 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9134 /* Union with XFmode must be in BLKmode. */
9135 return (mode == XFmode
9136 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9137 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9141 ix86_libcall_value (machine_mode mode)
9143 return ix86_function_value_1 (NULL, NULL, mode, mode);
9146 /* Return true iff type is returned in memory. */
9149 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9151 #ifdef SUBTARGET_RETURN_IN_MEMORY
9152 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9154 const machine_mode mode = type_natural_mode (type, NULL, true);
9159 if (ix86_function_type_abi (fntype) == MS_ABI)
9161 size = int_size_in_bytes (type);
9163 /* __m128 is returned in xmm0. */
9164 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9165 || INTEGRAL_TYPE_P (type)
9166 || VECTOR_FLOAT_TYPE_P (type))
9167 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9168 && !COMPLEX_MODE_P (mode)
9169 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9172 /* Otherwise, the size must be exactly in [1248]. */
9173 return size != 1 && size != 2 && size != 4 && size != 8;
9177 int needed_intregs, needed_sseregs;
9179 return examine_argument (mode, type, 1,
9180 &needed_intregs, &needed_sseregs);
9185 size = int_size_in_bytes (type);
9187 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9188 bytes in registers. */
9190 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9192 if (mode == BLKmode)
9195 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9198 if (VECTOR_MODE_P (mode) || mode == TImode)
9200 /* User-created vectors small enough to fit in EAX. */
9204 /* Unless ABI prescibes otherwise,
9205 MMX/3dNow values are returned in MM0 if available. */
9208 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9210 /* SSE values are returned in XMM0 if available. */
9214 /* AVX values are returned in YMM0 if available. */
9218 /* AVX512F values are returned in ZMM0 if available. */
9220 return !TARGET_AVX512F;
9229 /* OImode shouldn't be used directly. */
9230 gcc_assert (mode != OImode);
9238 /* Create the va_list data type. */
9241 ix86_build_builtin_va_list_64 (void)
9243 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9245 record = lang_hooks.types.make_type (RECORD_TYPE);
9246 type_decl = build_decl (BUILTINS_LOCATION,
9247 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9249 f_gpr = build_decl (BUILTINS_LOCATION,
9250 FIELD_DECL, get_identifier ("gp_offset"),
9251 unsigned_type_node);
9252 f_fpr = build_decl (BUILTINS_LOCATION,
9253 FIELD_DECL, get_identifier ("fp_offset"),
9254 unsigned_type_node);
9255 f_ovf = build_decl (BUILTINS_LOCATION,
9256 FIELD_DECL, get_identifier ("overflow_arg_area"),
9258 f_sav = build_decl (BUILTINS_LOCATION,
9259 FIELD_DECL, get_identifier ("reg_save_area"),
9262 va_list_gpr_counter_field = f_gpr;
9263 va_list_fpr_counter_field = f_fpr;
9265 DECL_FIELD_CONTEXT (f_gpr) = record;
9266 DECL_FIELD_CONTEXT (f_fpr) = record;
9267 DECL_FIELD_CONTEXT (f_ovf) = record;
9268 DECL_FIELD_CONTEXT (f_sav) = record;
9270 TYPE_STUB_DECL (record) = type_decl;
9271 TYPE_NAME (record) = type_decl;
9272 TYPE_FIELDS (record) = f_gpr;
9273 DECL_CHAIN (f_gpr) = f_fpr;
9274 DECL_CHAIN (f_fpr) = f_ovf;
9275 DECL_CHAIN (f_ovf) = f_sav;
9277 layout_type (record);
9279 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9280 NULL_TREE, TYPE_ATTRIBUTES (record));
9282 /* The correct type is an array type of one element. */
9283 return build_array_type (record, build_index_type (size_zero_node));
9286 /* Setup the builtin va_list data type and for 64-bit the additional
9287 calling convention specific va_list data types. */
9290 ix86_build_builtin_va_list (void)
9294 /* Initialize ABI specific va_list builtin types.
9296 In lto1, we can encounter two va_list types:
9297 - one as a result of the type-merge across TUs, and
9298 - the one constructed here.
9299 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9300 a type identity check in canonical_va_list_type based on
9301 TYPE_MAIN_VARIANT (which we used to have) will not work.
9302 Instead, we tag each va_list_type_node with its unique attribute, and
9303 look for the attribute in the type identity check in
9304 canonical_va_list_type.
9306 Tagging sysv_va_list_type_node directly with the attribute is
9307 problematic since it's a array of one record, which will degrade into a
9308 pointer to record when used as parameter (see build_va_arg comments for
9309 an example), dropping the attribute in the process. So we tag the
9312 /* For SYSV_ABI we use an array of one record. */
9313 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9315 /* For MS_ABI we use plain pointer to argument area. */
9316 tree char_ptr_type = build_pointer_type (char_type_node);
9317 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9318 TYPE_ATTRIBUTES (char_ptr_type));
9319 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9321 return ((ix86_abi == MS_ABI)
9322 ? ms_va_list_type_node
9323 : sysv_va_list_type_node);
9327 /* For i386 we use plain pointer to argument area. */
9328 return build_pointer_type (char_type_node);
9332 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9335 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9341 /* GPR size of varargs save area. */
9342 if (cfun->va_list_gpr_size)
9343 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9345 ix86_varargs_gpr_size = 0;
9347 /* FPR size of varargs save area. We don't need it if we don't pass
9348 anything in SSE registers. */
9349 if (TARGET_SSE && cfun->va_list_fpr_size)
9350 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9352 ix86_varargs_fpr_size = 0;
9354 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9357 save_area = frame_pointer_rtx;
9358 set = get_varargs_alias_set ();
9360 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9361 if (max > X86_64_REGPARM_MAX)
9362 max = X86_64_REGPARM_MAX;
9364 for (i = cum->regno; i < max; i++)
9366 mem = gen_rtx_MEM (word_mode,
9367 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9368 MEM_NOTRAP_P (mem) = 1;
9369 set_mem_alias_set (mem, set);
9370 emit_move_insn (mem,
9371 gen_rtx_REG (word_mode,
9372 x86_64_int_parameter_registers[i]));
9375 if (ix86_varargs_fpr_size)
9378 rtx_code_label *label;
9381 /* Now emit code to save SSE registers. The AX parameter contains number
9382 of SSE parameter registers used to call this function, though all we
9383 actually check here is the zero/non-zero status. */
9385 label = gen_label_rtx ();
9386 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9387 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9390 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9391 we used movdqa (i.e. TImode) instead? Perhaps even better would
9392 be if we could determine the real mode of the data, via a hook
9393 into pass_stdarg. Ignore all that for now. */
9395 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9396 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9398 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9399 if (max > X86_64_SSE_REGPARM_MAX)
9400 max = X86_64_SSE_REGPARM_MAX;
9402 for (i = cum->sse_regno; i < max; ++i)
9404 mem = plus_constant (Pmode, save_area,
9405 i * 16 + ix86_varargs_gpr_size);
9406 mem = gen_rtx_MEM (smode, mem);
9407 MEM_NOTRAP_P (mem) = 1;
9408 set_mem_alias_set (mem, set);
9409 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9411 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
9419 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9421 alias_set_type set = get_varargs_alias_set ();
9424 /* Reset to zero, as there might be a sysv vaarg used
9426 ix86_varargs_gpr_size = 0;
9427 ix86_varargs_fpr_size = 0;
9429 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9433 mem = gen_rtx_MEM (Pmode,
9434 plus_constant (Pmode, virtual_incoming_args_rtx,
9435 i * UNITS_PER_WORD));
9436 MEM_NOTRAP_P (mem) = 1;
9437 set_mem_alias_set (mem, set);
9439 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9440 emit_move_insn (mem, reg);
9445 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9446 tree type, int *, int no_rtl)
9448 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9449 CUMULATIVE_ARGS next_cum;
9452 /* This argument doesn't appear to be used anymore. Which is good,
9453 because the old code here didn't suppress rtl generation. */
9454 gcc_assert (!no_rtl);
9459 fntype = TREE_TYPE (current_function_decl);
9461 /* For varargs, we do not want to skip the dummy va_dcl argument.
9462 For stdargs, we do want to skip the last named argument. */
9464 if (stdarg_p (fntype))
9465 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9468 if (cum->call_abi == MS_ABI)
9469 setup_incoming_varargs_ms_64 (&next_cum);
9471 setup_incoming_varargs_64 (&next_cum);
9475 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9478 int *pretend_size ATTRIBUTE_UNUSED,
9481 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9482 CUMULATIVE_ARGS next_cum;
9486 gcc_assert (!no_rtl);
9488 /* Do nothing if we use plain pointer to argument area. */
9489 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9492 fntype = TREE_TYPE (current_function_decl);
9494 /* For varargs, we do not want to skip the dummy va_dcl argument.
9495 For stdargs, we do want to skip the last named argument. */
9497 if (stdarg_p (fntype))
9498 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9501 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9502 if (max > X86_64_REGPARM_MAX)
9503 max = X86_64_REGPARM_MAX;
9507 /* Checks if TYPE is of kind va_list char *. */
9510 is_va_list_char_pointer (tree type)
9514 /* For 32-bit it is always true. */
9517 canonic = ix86_canonical_va_list_type (type);
9518 return (canonic == ms_va_list_type_node
9519 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9522 /* Implement va_start. */
9525 ix86_va_start (tree valist, rtx nextarg)
9527 HOST_WIDE_INT words, n_gpr, n_fpr;
9528 tree f_gpr, f_fpr, f_ovf, f_sav;
9529 tree gpr, fpr, ovf, sav, t;
9533 if (flag_split_stack
9534 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9536 unsigned int scratch_regno;
9538 /* When we are splitting the stack, we can't refer to the stack
9539 arguments using internal_arg_pointer, because they may be on
9540 the old stack. The split stack prologue will arrange to
9541 leave a pointer to the old stack arguments in a scratch
9542 register, which we here copy to a pseudo-register. The split
9543 stack prologue can't set the pseudo-register directly because
9544 it (the prologue) runs before any registers have been saved. */
9546 scratch_regno = split_stack_prologue_scratch_regno ();
9547 if (scratch_regno != INVALID_REGNUM)
9552 reg = gen_reg_rtx (Pmode);
9553 cfun->machine->split_stack_varargs_pointer = reg;
9556 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9560 push_topmost_sequence ();
9561 emit_insn_after (seq, entry_of_function ());
9562 pop_topmost_sequence ();
9566 /* Only 64bit target needs something special. */
9567 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9569 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9570 std_expand_builtin_va_start (valist, nextarg);
9575 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9576 next = expand_binop (ptr_mode, add_optab,
9577 cfun->machine->split_stack_varargs_pointer,
9578 crtl->args.arg_offset_rtx,
9579 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9580 convert_move (va_r, next, 0);
9585 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9586 f_fpr = DECL_CHAIN (f_gpr);
9587 f_ovf = DECL_CHAIN (f_fpr);
9588 f_sav = DECL_CHAIN (f_ovf);
9590 valist = build_simple_mem_ref (valist);
9591 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9592 /* The following should be folded into the MEM_REF offset. */
9593 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9595 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9597 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9599 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9602 /* Count number of gp and fp argument registers used. */
9603 words = crtl->args.info.words;
9604 n_gpr = crtl->args.info.regno;
9605 n_fpr = crtl->args.info.sse_regno;
9607 if (cfun->va_list_gpr_size)
9609 type = TREE_TYPE (gpr);
9610 t = build2 (MODIFY_EXPR, type,
9611 gpr, build_int_cst (type, n_gpr * 8));
9612 TREE_SIDE_EFFECTS (t) = 1;
9613 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9616 if (TARGET_SSE && cfun->va_list_fpr_size)
9618 type = TREE_TYPE (fpr);
9619 t = build2 (MODIFY_EXPR, type, fpr,
9620 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9621 TREE_SIDE_EFFECTS (t) = 1;
9622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9625 /* Find the overflow area. */
9626 type = TREE_TYPE (ovf);
9627 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9628 ovf_rtx = crtl->args.internal_arg_pointer;
9630 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9631 t = make_tree (type, ovf_rtx);
9633 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9635 t = build2 (MODIFY_EXPR, type, ovf, t);
9636 TREE_SIDE_EFFECTS (t) = 1;
9637 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9639 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9641 /* Find the register save area.
9642 Prologue of the function save it right above stack frame. */
9643 type = TREE_TYPE (sav);
9644 t = make_tree (type, frame_pointer_rtx);
9645 if (!ix86_varargs_gpr_size)
9646 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9648 t = build2 (MODIFY_EXPR, type, sav, t);
9649 TREE_SIDE_EFFECTS (t) = 1;
9650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9654 /* Implement va_arg. */
9657 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9660 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9661 tree f_gpr, f_fpr, f_ovf, f_sav;
9662 tree gpr, fpr, ovf, sav, t;
9664 tree lab_false, lab_over = NULL_TREE;
9669 machine_mode nat_mode;
9670 unsigned int arg_boundary;
9672 /* Only 64bit target needs something special. */
9673 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9674 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9676 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9677 f_fpr = DECL_CHAIN (f_gpr);
9678 f_ovf = DECL_CHAIN (f_fpr);
9679 f_sav = DECL_CHAIN (f_ovf);
9681 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9682 valist, f_gpr, NULL_TREE);
9684 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9685 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9686 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9688 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9690 type = build_pointer_type (type);
9691 size = arg_int_size_in_bytes (type);
9692 rsize = CEIL (size, UNITS_PER_WORD);
9694 nat_mode = type_natural_mode (type, NULL, false);
9709 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9710 if (!TARGET_64BIT_MS_ABI)
9718 container = construct_container (nat_mode, TYPE_MODE (type),
9719 type, 0, X86_64_REGPARM_MAX,
9720 X86_64_SSE_REGPARM_MAX, intreg,
9725 /* Pull the value out of the saved registers. */
9727 addr = create_tmp_var (ptr_type_node, "addr");
9731 int needed_intregs, needed_sseregs;
9733 tree int_addr, sse_addr;
9735 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9736 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9738 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9740 need_temp = (!REG_P (container)
9741 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9742 || TYPE_ALIGN (type) > 128));
9744 /* In case we are passing structure, verify that it is consecutive block
9745 on the register save area. If not we need to do moves. */
9746 if (!need_temp && !REG_P (container))
9748 /* Verify that all registers are strictly consecutive */
9749 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9753 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9755 rtx slot = XVECEXP (container, 0, i);
9756 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9757 || INTVAL (XEXP (slot, 1)) != i * 16)
9765 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9767 rtx slot = XVECEXP (container, 0, i);
9768 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9769 || INTVAL (XEXP (slot, 1)) != i * 8)
9781 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9782 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9785 /* First ensure that we fit completely in registers. */
9788 t = build_int_cst (TREE_TYPE (gpr),
9789 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9790 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9791 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9792 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9793 gimplify_and_add (t, pre_p);
9797 t = build_int_cst (TREE_TYPE (fpr),
9798 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9799 + X86_64_REGPARM_MAX * 8);
9800 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9801 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9802 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9803 gimplify_and_add (t, pre_p);
9806 /* Compute index to start of area used for integer regs. */
9809 /* int_addr = gpr + sav; */
9810 t = fold_build_pointer_plus (sav, gpr);
9811 gimplify_assign (int_addr, t, pre_p);
9815 /* sse_addr = fpr + sav; */
9816 t = fold_build_pointer_plus (sav, fpr);
9817 gimplify_assign (sse_addr, t, pre_p);
9821 int i, prev_size = 0;
9822 tree temp = create_tmp_var (type, "va_arg_tmp");
9825 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9826 gimplify_assign (addr, t, pre_p);
9828 for (i = 0; i < XVECLEN (container, 0); i++)
9830 rtx slot = XVECEXP (container, 0, i);
9831 rtx reg = XEXP (slot, 0);
9832 machine_mode mode = GET_MODE (reg);
9838 tree dest_addr, dest;
9839 int cur_size = GET_MODE_SIZE (mode);
9841 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9842 prev_size = INTVAL (XEXP (slot, 1));
9843 if (prev_size + cur_size > size)
9845 cur_size = size - prev_size;
9846 unsigned int nbits = cur_size * BITS_PER_UNIT;
9847 if (!int_mode_for_size (nbits, 1).exists (&mode))
9850 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9851 if (mode == GET_MODE (reg))
9852 addr_type = build_pointer_type (piece_type);
9854 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9856 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9859 if (SSE_REGNO_P (REGNO (reg)))
9861 src_addr = sse_addr;
9862 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9866 src_addr = int_addr;
9867 src_offset = REGNO (reg) * 8;
9869 src_addr = fold_convert (addr_type, src_addr);
9870 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9872 dest_addr = fold_convert (daddr_type, addr);
9873 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9874 if (cur_size == GET_MODE_SIZE (mode))
9876 src = build_va_arg_indirect_ref (src_addr);
9877 dest = build_va_arg_indirect_ref (dest_addr);
9879 gimplify_assign (dest, src, pre_p);
9884 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9885 3, dest_addr, src_addr,
9886 size_int (cur_size));
9887 gimplify_and_add (copy, pre_p);
9889 prev_size += cur_size;
9895 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9896 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9897 gimplify_assign (gpr, t, pre_p);
9902 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9903 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9904 gimplify_assign (unshare_expr (fpr), t, pre_p);
9907 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9909 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9912 /* ... otherwise out of the overflow area. */
9914 /* When we align parameter on stack for caller, if the parameter
9915 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9916 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9917 here with caller. */
9918 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9919 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9920 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9922 /* Care for on-stack alignment if needed. */
9923 if (arg_boundary <= 64 || size == 0)
9927 HOST_WIDE_INT align = arg_boundary / 8;
9928 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9929 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9930 build_int_cst (TREE_TYPE (t), -align));
9933 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9934 gimplify_assign (addr, t, pre_p);
9936 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9937 gimplify_assign (unshare_expr (ovf), t, pre_p);
9940 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9942 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9943 addr = fold_convert (ptrtype, addr);
9946 addr = build_va_arg_indirect_ref (addr);
9947 return build_va_arg_indirect_ref (addr);
9950 /* Return true if OPNUM's MEM should be matched
9951 in movabs* patterns. */
9954 ix86_check_movabs (rtx insn, int opnum)
9958 set = PATTERN (insn);
9959 if (GET_CODE (set) == PARALLEL)
9960 set = XVECEXP (set, 0, 0);
9961 gcc_assert (GET_CODE (set) == SET);
9962 mem = XEXP (set, opnum);
9963 while (SUBREG_P (mem))
9964 mem = SUBREG_REG (mem);
9965 gcc_assert (MEM_P (mem));
9966 return volatile_ok || !MEM_VOLATILE_P (mem);
9969 /* Return false if INSN contains a MEM with a non-default address space. */
9971 ix86_check_no_addr_space (rtx insn)
9973 subrtx_var_iterator::array_type array;
9974 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
9977 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
9983 /* Initialize the table of extra 80387 mathematical constants. */
9986 init_ext_80387_constants (void)
9988 static const char * cst[5] =
9990 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9991 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9992 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9993 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9994 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9998 for (i = 0; i < 5; i++)
10000 real_from_string (&ext_80387_constants_table[i], cst[i]);
10001 /* Ensure each constant is rounded to XFmode precision. */
10002 real_convert (&ext_80387_constants_table[i],
10003 XFmode, &ext_80387_constants_table[i]);
10006 ext_80387_constants_init = 1;
10009 /* Return non-zero if the constant is something that
10010 can be loaded with a special instruction. */
10013 standard_80387_constant_p (rtx x)
10015 machine_mode mode = GET_MODE (x);
10017 const REAL_VALUE_TYPE *r;
10019 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10022 if (x == CONST0_RTX (mode))
10024 if (x == CONST1_RTX (mode))
10027 r = CONST_DOUBLE_REAL_VALUE (x);
10029 /* For XFmode constants, try to find a special 80387 instruction when
10030 optimizing for size or on those CPUs that benefit from them. */
10032 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10036 if (! ext_80387_constants_init)
10037 init_ext_80387_constants ();
10039 for (i = 0; i < 5; i++)
10040 if (real_identical (r, &ext_80387_constants_table[i]))
10044 /* Load of the constant -0.0 or -1.0 will be split as
10045 fldz;fchs or fld1;fchs sequence. */
10046 if (real_isnegzero (r))
10048 if (real_identical (r, &dconstm1))
10054 /* Return the opcode of the special instruction to be used to load
10058 standard_80387_constant_opcode (rtx x)
10060 switch (standard_80387_constant_p (x))
10080 gcc_unreachable ();
10084 /* Return the CONST_DOUBLE representing the 80387 constant that is
10085 loaded by the specified special instruction. The argument IDX
10086 matches the return value from standard_80387_constant_p. */
10089 standard_80387_constant_rtx (int idx)
10093 if (! ext_80387_constants_init)
10094 init_ext_80387_constants ();
10107 gcc_unreachable ();
10110 return const_double_from_real_value (ext_80387_constants_table[i],
10114 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10115 in supported SSE/AVX vector mode. */
10118 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10125 mode = GET_MODE (x);
10127 if (x == const0_rtx || const0_operand (x, mode))
10130 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10132 /* VOIDmode integer constant, get mode from the predicate. */
10133 if (mode == VOIDmode)
10136 switch (GET_MODE_SIZE (mode))
10139 if (TARGET_AVX512F)
10152 gcc_unreachable ();
10161 /* Return the opcode of the special instruction to be used to load
10162 the constant operands[1] into operands[0]. */
10165 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
10168 rtx x = operands[1];
10170 gcc_assert (TARGET_SSE);
10172 mode = GET_MODE (x);
10174 if (x == const0_rtx || const0_operand (x, mode))
10176 switch (get_attr_mode (insn))
10179 if (!EXT_REX_SSE_REG_P (operands[0]))
10180 return "%vpxor\t%0, %d0";
10184 if (EXT_REX_SSE_REG_P (operands[0]))
10185 return (TARGET_AVX512VL
10186 ? "vpxord\t%x0, %x0, %x0"
10187 : "vpxord\t%g0, %g0, %g0");
10188 return "vpxor\t%x0, %x0, %x0";
10191 if (!EXT_REX_SSE_REG_P (operands[0]))
10192 return "%vxorpd\t%0, %d0";
10196 if (!EXT_REX_SSE_REG_P (operands[0]))
10197 return "vxorpd\t%x0, %x0, %x0";
10198 else if (TARGET_AVX512DQ)
10199 return (TARGET_AVX512VL
10200 ? "vxorpd\t%x0, %x0, %x0"
10201 : "vxorpd\t%g0, %g0, %g0");
10203 return (TARGET_AVX512VL
10204 ? "vpxorq\t%x0, %x0, %x0"
10205 : "vpxorq\t%g0, %g0, %g0");
10208 if (!EXT_REX_SSE_REG_P (operands[0]))
10209 return "%vxorps\t%0, %d0";
10213 if (!EXT_REX_SSE_REG_P (operands[0]))
10214 return "vxorps\t%x0, %x0, %x0";
10215 else if (TARGET_AVX512DQ)
10216 return (TARGET_AVX512VL
10217 ? "vxorps\t%x0, %x0, %x0"
10218 : "vxorps\t%g0, %g0, %g0");
10220 return (TARGET_AVX512VL
10221 ? "vpxord\t%x0, %x0, %x0"
10222 : "vpxord\t%g0, %g0, %g0");
10225 gcc_unreachable ();
10228 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10230 enum attr_mode insn_mode = get_attr_mode (insn);
10237 gcc_assert (TARGET_AVX512F);
10238 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10243 gcc_assert (TARGET_AVX2);
10248 gcc_assert (TARGET_SSE2);
10249 if (!EXT_REX_SSE_REG_P (operands[0]))
10251 ? "vpcmpeqd\t%0, %0, %0"
10252 : "pcmpeqd\t%0, %0");
10253 else if (TARGET_AVX512VL)
10254 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
10256 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10259 gcc_unreachable ();
10263 gcc_unreachable ();
10266 /* Returns true if INSN can be transformed from a memory load
10267 to a supported FP constant load. */
10270 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10272 rtx src = find_constant_src (insn);
10274 gcc_assert (REG_P (dst));
10277 || (SSE_REGNO_P (REGNO (dst))
10278 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10279 || (STACK_REGNO_P (REGNO (dst))
10280 && standard_80387_constant_p (src) < 1))
10286 /* Returns true if OP contains a symbol reference */
10289 symbolic_reference_mentioned_p (rtx op)
10294 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10297 fmt = GET_RTX_FORMAT (GET_CODE (op));
10298 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10304 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10305 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10309 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10316 /* Return true if it is appropriate to emit `ret' instructions in the
10317 body of a function. Do this only if the epilogue is simple, needing a
10318 couple of insns. Prior to reloading, we can't tell how many registers
10319 must be saved, so return false then. Return false if there is no frame
10320 marker to de-allocate. */
10323 ix86_can_use_return_insn_p (void)
10325 if (ix86_function_naked (current_function_decl))
10328 /* Don't use `ret' instruction in interrupt handler. */
10329 if (! reload_completed
10330 || frame_pointer_needed
10331 || cfun->machine->func_type != TYPE_NORMAL)
10334 /* Don't allow more than 32k pop, since that's all we can do
10335 with one instruction. */
10336 if (crtl->args.pops_args && crtl->args.size >= 32768)
10339 struct ix86_frame &frame = cfun->machine->frame;
10340 return (frame.stack_pointer_offset == UNITS_PER_WORD
10341 && (frame.nregs + frame.nsseregs) == 0);
10344 /* Value should be nonzero if functions must have frame pointers.
10345 Zero means the frame pointer need not be set up (and parms may
10346 be accessed via the stack pointer) in functions that seem suitable. */
10349 ix86_frame_pointer_required (void)
10351 /* If we accessed previous frames, then the generated code expects
10352 to be able to access the saved ebp value in our frame. */
10353 if (cfun->machine->accesses_prev_frame)
10356 /* Several x86 os'es need a frame pointer for other reasons,
10357 usually pertaining to setjmp. */
10358 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10361 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10362 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10365 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10366 allocation is 4GB. */
10367 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10370 /* SSE saves require frame-pointer when stack is misaligned. */
10371 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10374 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10375 turns off the frame pointer by default. Turn it back on now if
10376 we've not got a leaf function. */
10377 if (TARGET_OMIT_LEAF_FRAME_POINTER
10379 || ix86_current_function_calls_tls_descriptor))
10382 if (crtl->profile && !flag_fentry)
10388 /* Record that the current function accesses previous call frames. */
10391 ix86_setup_frame_addresses (void)
10393 cfun->machine->accesses_prev_frame = 1;
10396 #ifndef USE_HIDDEN_LINKONCE
10397 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10398 # define USE_HIDDEN_LINKONCE 1
10400 # define USE_HIDDEN_LINKONCE 0
10404 /* Label count for call and return thunks. It is used to make unique
10405 labels in call and return thunks. */
10406 static int indirectlabelno;
10408 /* True if call thunk function is needed. */
10409 static bool indirect_thunk_needed = false;
10411 /* Bit masks of integer registers, which contain branch target, used
10412 by call thunk functions. */
10413 static int indirect_thunks_used;
10415 /* True if return thunk function is needed. */
10416 static bool indirect_return_needed = false;
10418 /* True if return thunk function via CX is needed. */
10419 static bool indirect_return_via_cx;
10421 #ifndef INDIRECT_LABEL
10422 # define INDIRECT_LABEL "LIND"
10425 /* Indicate what prefix is needed for an indirect branch. */
10426 enum indirect_thunk_prefix
10428 indirect_thunk_prefix_none,
10429 indirect_thunk_prefix_nt
10432 /* Return the prefix needed for an indirect branch INSN. */
10434 enum indirect_thunk_prefix
10435 indirect_thunk_need_prefix (rtx_insn *insn)
10437 enum indirect_thunk_prefix need_prefix;
10438 if ((cfun->machine->indirect_branch_type
10439 == indirect_branch_thunk_extern)
10440 && ix86_notrack_prefixed_insn_p (insn))
10442 /* NOTRACK prefix is only used with external thunk so that it
10443 can be properly updated to support CET at run-time. */
10444 need_prefix = indirect_thunk_prefix_nt;
10447 need_prefix = indirect_thunk_prefix_none;
10448 return need_prefix;
10451 /* Fills in the label name that should be used for the indirect thunk. */
10454 indirect_thunk_name (char name[32], unsigned int regno,
10455 enum indirect_thunk_prefix need_prefix,
10458 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
10459 gcc_unreachable ();
10461 if (USE_HIDDEN_LINKONCE)
10463 const char *prefix;
10465 if (need_prefix == indirect_thunk_prefix_nt
10466 && regno != INVALID_REGNUM)
10468 /* NOTRACK prefix is only used with external thunk via
10469 register so that NOTRACK prefix can be added to indirect
10470 branch via register to support CET at run-time. */
10476 const char *ret = ret_p ? "return" : "indirect";
10478 if (regno != INVALID_REGNUM)
10480 const char *reg_prefix;
10481 if (LEGACY_INT_REGNO_P (regno))
10482 reg_prefix = TARGET_64BIT ? "r" : "e";
10485 sprintf (name, "__x86_%s_thunk%s_%s%s",
10486 ret, prefix, reg_prefix, reg_names[regno]);
10489 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
10493 if (regno != INVALID_REGNUM)
10494 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
10498 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
10500 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
10505 /* Output a call and return thunk for indirect branch. If REGNO != -1,
10506 the function address is in REGNO and the call and return thunk looks like:
10517 Otherwise, the function address is on the top of stack and the
10518 call and return thunk looks like:
10526 lea WORD_SIZE(%sp), %sp
10531 output_indirect_thunk (unsigned int regno)
10533 char indirectlabel1[32];
10534 char indirectlabel2[32];
10536 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
10537 indirectlabelno++);
10538 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
10539 indirectlabelno++);
10542 fputs ("\tcall\t", asm_out_file);
10543 assemble_name_raw (asm_out_file, indirectlabel2);
10544 fputc ('\n', asm_out_file);
10546 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
10548 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
10549 Usage of both pause + lfence is compromise solution. */
10550 fprintf (asm_out_file, "\tpause\n\tlfence\n");
10553 fputs ("\tjmp\t", asm_out_file);
10554 assemble_name_raw (asm_out_file, indirectlabel1);
10555 fputc ('\n', asm_out_file);
10557 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
10559 /* The above call insn pushed a word to stack. Adjust CFI info. */
10560 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
10562 if (! dwarf2out_do_cfi_asm ())
10564 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
10565 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
10566 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
10567 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
10569 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
10570 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
10571 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
10572 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
10573 dwarf2out_emit_cfi (xcfi);
10576 if (regno != INVALID_REGNUM)
10580 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
10581 xops[1] = gen_rtx_REG (word_mode, regno);
10582 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
10588 xops[0] = stack_pointer_rtx;
10589 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10590 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
10593 fputs ("\tret\n", asm_out_file);
10596 /* Output a funtion with a call and return thunk for indirect branch.
10597 If REGNO != INVALID_REGNUM, the function address is in REGNO.
10598 Otherwise, the function address is on the top of stack. Thunk is
10599 used for function return if RET_P is true. */
10602 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
10603 unsigned int regno, bool ret_p)
10608 /* Create __x86_indirect_thunk. */
10609 indirect_thunk_name (name, regno, need_prefix, ret_p);
10610 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10611 get_identifier (name),
10612 build_function_type_list (void_type_node, NULL_TREE));
10613 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10614 NULL_TREE, void_type_node);
10615 TREE_PUBLIC (decl) = 1;
10616 TREE_STATIC (decl) = 1;
10617 DECL_IGNORED_P (decl) = 1;
10622 switch_to_section (darwin_sections[picbase_thunk_section]);
10623 fputs ("\t.weak_definition\t", asm_out_file);
10624 assemble_name (asm_out_file, name);
10625 fputs ("\n\t.private_extern\t", asm_out_file);
10626 assemble_name (asm_out_file, name);
10627 putc ('\n', asm_out_file);
10628 ASM_OUTPUT_LABEL (asm_out_file, name);
10629 DECL_WEAK (decl) = 1;
10633 if (USE_HIDDEN_LINKONCE)
10635 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10637 targetm.asm_out.unique_section (decl, 0);
10638 switch_to_section (get_named_section (decl, NULL, 0));
10640 targetm.asm_out.globalize_label (asm_out_file, name);
10641 fputs ("\t.hidden\t", asm_out_file);
10642 assemble_name (asm_out_file, name);
10643 putc ('\n', asm_out_file);
10644 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10648 switch_to_section (text_section);
10649 ASM_OUTPUT_LABEL (asm_out_file, name);
10652 DECL_INITIAL (decl) = make_node (BLOCK);
10653 current_function_decl = decl;
10654 allocate_struct_function (decl, false);
10655 init_function_start (decl);
10656 /* We're about to hide the function body from callees of final_* by
10657 emitting it directly; tell them we're a thunk, if they care. */
10658 cfun->is_thunk = true;
10659 first_function_block_is_cold = false;
10660 /* Make sure unwind info is emitted for the thunk if needed. */
10661 final_start_function (emit_barrier (), asm_out_file, 1);
10663 output_indirect_thunk (regno);
10665 final_end_function ();
10666 init_insn_lengths ();
10667 free_after_compilation (cfun);
10669 current_function_decl = NULL;
10672 static int pic_labels_used;
10674 /* Fills in the label name that should be used for a pc thunk for
10675 the given register. */
10678 get_pc_thunk_name (char name[32], unsigned int regno)
10680 gcc_assert (!TARGET_64BIT);
10682 if (USE_HIDDEN_LINKONCE)
10683 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10685 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10689 /* This function generates code for -fpic that loads %ebx with
10690 the return address of the caller and then returns. */
10693 ix86_code_end (void)
10696 unsigned int regno;
10698 if (indirect_return_needed)
10699 output_indirect_thunk_function (indirect_thunk_prefix_none,
10700 INVALID_REGNUM, true);
10701 if (indirect_return_via_cx)
10702 output_indirect_thunk_function (indirect_thunk_prefix_none,
10704 if (indirect_thunk_needed)
10705 output_indirect_thunk_function (indirect_thunk_prefix_none,
10706 INVALID_REGNUM, false);
10708 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
10710 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
10711 if ((indirect_thunks_used & (1 << i)))
10712 output_indirect_thunk_function (indirect_thunk_prefix_none,
10716 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
10721 if ((indirect_thunks_used & (1 << regno)))
10722 output_indirect_thunk_function (indirect_thunk_prefix_none,
10725 if (!(pic_labels_used & (1 << regno)))
10728 get_pc_thunk_name (name, regno);
10730 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10731 get_identifier (name),
10732 build_function_type_list (void_type_node, NULL_TREE));
10733 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10734 NULL_TREE, void_type_node);
10735 TREE_PUBLIC (decl) = 1;
10736 TREE_STATIC (decl) = 1;
10737 DECL_IGNORED_P (decl) = 1;
10742 switch_to_section (darwin_sections[picbase_thunk_section]);
10743 fputs ("\t.weak_definition\t", asm_out_file);
10744 assemble_name (asm_out_file, name);
10745 fputs ("\n\t.private_extern\t", asm_out_file);
10746 assemble_name (asm_out_file, name);
10747 putc ('\n', asm_out_file);
10748 ASM_OUTPUT_LABEL (asm_out_file, name);
10749 DECL_WEAK (decl) = 1;
10753 if (USE_HIDDEN_LINKONCE)
10755 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10757 targetm.asm_out.unique_section (decl, 0);
10758 switch_to_section (get_named_section (decl, NULL, 0));
10760 targetm.asm_out.globalize_label (asm_out_file, name);
10761 fputs ("\t.hidden\t", asm_out_file);
10762 assemble_name (asm_out_file, name);
10763 putc ('\n', asm_out_file);
10764 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10768 switch_to_section (text_section);
10769 ASM_OUTPUT_LABEL (asm_out_file, name);
10772 DECL_INITIAL (decl) = make_node (BLOCK);
10773 current_function_decl = decl;
10774 allocate_struct_function (decl, false);
10775 init_function_start (decl);
10776 /* We're about to hide the function body from callees of final_* by
10777 emitting it directly; tell them we're a thunk, if they care. */
10778 cfun->is_thunk = true;
10779 first_function_block_is_cold = false;
10780 /* Make sure unwind info is emitted for the thunk if needed. */
10781 final_start_function (emit_barrier (), asm_out_file, 1);
10783 /* Pad stack IP move with 4 instructions (two NOPs count
10784 as one instruction). */
10785 if (TARGET_PAD_SHORT_FUNCTION)
10790 fputs ("\tnop\n", asm_out_file);
10793 xops[0] = gen_rtx_REG (Pmode, regno);
10794 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10795 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
10796 output_asm_insn ("%!ret", NULL);
10797 final_end_function ();
10798 init_insn_lengths ();
10799 free_after_compilation (cfun);
10801 current_function_decl = NULL;
10804 if (flag_split_stack)
10805 file_end_indicate_split_stack ();
10808 /* Emit code for the SET_GOT patterns. */
10811 output_set_got (rtx dest, rtx label)
10817 if (TARGET_VXWORKS_RTP && flag_pic)
10819 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
10820 xops[2] = gen_rtx_MEM (Pmode,
10821 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
10822 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
10824 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
10825 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
10826 an unadorned address. */
10827 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
10828 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
10829 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
10833 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
10838 get_pc_thunk_name (name, REGNO (dest));
10839 pic_labels_used |= 1 << REGNO (dest);
10841 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
10842 xops[2] = gen_rtx_MEM (QImode, xops[2]);
10843 output_asm_insn ("%!call\t%X2", xops);
10846 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10847 This is what will be referenced by the Mach-O PIC subsystem. */
10848 if (machopic_should_output_picbase_label () || !label)
10849 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10851 /* When we are restoring the pic base at the site of a nonlocal label,
10852 and we decided to emit the pic base above, we will still output a
10853 local label used for calculating the correction offset (even though
10854 the offset will be 0 in that case). */
10856 targetm.asm_out.internal_label (asm_out_file, "L",
10857 CODE_LABEL_NUMBER (label));
10863 /* We don't need a pic base, we're not producing pic. */
10864 gcc_unreachable ();
10866 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
10867 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
10868 targetm.asm_out.internal_label (asm_out_file, "L",
10869 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
10873 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10878 /* Generate an "push" pattern for input ARG. */
10883 struct machine_function *m = cfun->machine;
10885 if (m->fs.cfa_reg == stack_pointer_rtx)
10886 m->fs.cfa_offset += UNITS_PER_WORD;
10887 m->fs.sp_offset += UNITS_PER_WORD;
10889 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10890 arg = gen_rtx_REG (word_mode, REGNO (arg));
10892 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10893 gen_rtx_PRE_DEC (Pmode,
10894 stack_pointer_rtx)),
10898 /* Generate an "pop" pattern for input ARG. */
10903 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10904 arg = gen_rtx_REG (word_mode, REGNO (arg));
10906 return gen_rtx_SET (arg,
10907 gen_rtx_MEM (word_mode,
10908 gen_rtx_POST_INC (Pmode,
10909 stack_pointer_rtx)));
10912 /* Return >= 0 if there is an unused call-clobbered register available
10913 for the entire function. */
10915 static unsigned int
10916 ix86_select_alt_pic_regnum (void)
10918 if (ix86_use_pseudo_pic_reg ())
10919 return INVALID_REGNUM;
10923 && !ix86_current_function_calls_tls_descriptor)
10926 /* Can't use the same register for both PIC and DRAP. */
10927 if (crtl->drap_reg)
10928 drap = REGNO (crtl->drap_reg);
10931 for (i = 2; i >= 0; --i)
10932 if (i != drap && !df_regs_ever_live_p (i))
10936 return INVALID_REGNUM;
10939 /* Return true if REGNO is used by the epilogue. */
10942 ix86_epilogue_uses (int regno)
10944 /* If there are no caller-saved registers, we preserve all registers,
10945 except for MMX and x87 registers which aren't supported when saving
10946 and restoring registers. Don't explicitly save SP register since
10947 it is always preserved. */
10948 return (epilogue_completed
10949 && cfun->machine->no_caller_saved_registers
10950 && !fixed_regs[regno]
10951 && !STACK_REGNO_P (regno)
10952 && !MMX_REGNO_P (regno));
10955 /* Return nonzero if register REGNO can be used as a scratch register
10959 ix86_hard_regno_scratch_ok (unsigned int regno)
10961 /* If there are no caller-saved registers, we can't use any register
10962 as a scratch register after epilogue and use REGNO as scratch
10963 register only if it has been used before to avoid saving and
10965 return (!cfun->machine->no_caller_saved_registers
10966 || (!epilogue_completed
10967 && df_regs_ever_live_p (regno)));
10970 /* Return TRUE if we need to save REGNO. */
10973 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
10975 /* If there are no caller-saved registers, we preserve all registers,
10976 except for MMX and x87 registers which aren't supported when saving
10977 and restoring registers. Don't explicitly save SP register since
10978 it is always preserved. */
10979 if (cfun->machine->no_caller_saved_registers)
10981 /* Don't preserve registers used for function return value. */
10982 rtx reg = crtl->return_rtx;
10985 unsigned int i = REGNO (reg);
10986 unsigned int nregs = REG_NREGS (reg);
10987 while (nregs-- > 0)
10988 if ((i + nregs) == regno)
10992 return (df_regs_ever_live_p (regno)
10993 && !fixed_regs[regno]
10994 && !STACK_REGNO_P (regno)
10995 && !MMX_REGNO_P (regno)
10996 && (regno != HARD_FRAME_POINTER_REGNUM
10997 || !frame_pointer_needed));
11000 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11001 && pic_offset_table_rtx)
11003 if (ix86_use_pseudo_pic_reg ())
11005 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11006 _mcount in prologue. */
11007 if (!TARGET_64BIT && flag_pic && crtl->profile)
11010 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11012 || crtl->calls_eh_return
11013 || crtl->uses_const_pool
11014 || cfun->has_nonlocal_label)
11015 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11018 if (crtl->calls_eh_return && maybe_eh_return)
11023 unsigned test = EH_RETURN_DATA_REGNO (i);
11024 if (test == INVALID_REGNUM)
11031 if (ignore_outlined && cfun->machine->call_ms2sysv)
11033 unsigned count = cfun->machine->call_ms2sysv_extra_regs
11034 + xlogue_layout::MIN_REGS;
11035 if (xlogue_layout::is_stub_managed_reg (regno, count))
11040 && regno == REGNO (crtl->drap_reg)
11041 && !cfun->machine->no_drap_save_restore)
11044 return (df_regs_ever_live_p (regno)
11045 && !call_used_regs[regno]
11046 && !fixed_regs[regno]
11047 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11050 /* Return number of saved general prupose registers. */
11053 ix86_nsaved_regs (void)
11058 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11059 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11064 /* Return number of saved SSE registers. */
11067 ix86_nsaved_sseregs (void)
11072 if (!TARGET_64BIT_MS_ABI)
11074 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11075 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11080 /* Given FROM and TO register numbers, say whether this elimination is
11081 allowed. If stack alignment is needed, we can only replace argument
11082 pointer with hard frame pointer, or replace frame pointer with stack
11083 pointer. Otherwise, frame pointer elimination is automatically
11084 handled and all other eliminations are valid. */
11087 ix86_can_eliminate (const int from, const int to)
11089 if (stack_realign_fp)
11090 return ((from == ARG_POINTER_REGNUM
11091 && to == HARD_FRAME_POINTER_REGNUM)
11092 || (from == FRAME_POINTER_REGNUM
11093 && to == STACK_POINTER_REGNUM));
11095 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11098 /* Return the offset between two registers, one to be eliminated, and the other
11099 its replacement, at the start of a routine. */
11102 ix86_initial_elimination_offset (int from, int to)
11104 struct ix86_frame &frame = cfun->machine->frame;
11106 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11107 return frame.hard_frame_pointer_offset;
11108 else if (from == FRAME_POINTER_REGNUM
11109 && to == HARD_FRAME_POINTER_REGNUM)
11110 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11113 gcc_assert (to == STACK_POINTER_REGNUM);
11115 if (from == ARG_POINTER_REGNUM)
11116 return frame.stack_pointer_offset;
11118 gcc_assert (from == FRAME_POINTER_REGNUM);
11119 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11123 /* In a dynamically-aligned function, we can't know the offset from
11124 stack pointer to frame pointer, so we must ensure that setjmp
11125 eliminates fp against the hard fp (%ebp) rather than trying to
11126 index from %esp up to the top of the frame across a gap that is
11127 of unknown (at compile-time) size. */
11129 ix86_builtin_setjmp_frame_value (void)
11131 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11134 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11135 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11137 static bool warned_once = false;
11140 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11142 warned_once = true;
11146 /* Return the probing interval for -fstack-clash-protection. */
11148 static HOST_WIDE_INT
11149 get_probe_interval (void)
11151 if (flag_stack_clash_protection)
11152 return (HOST_WIDE_INT_1U
11153 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
11155 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
11158 /* When using -fsplit-stack, the allocation routines set a field in
11159 the TCB to the bottom of the stack plus this much space, measured
11162 #define SPLIT_STACK_AVAILABLE 256
11164 /* Fill structure ix86_frame about frame of currently computed function. */
11167 ix86_compute_frame_layout (void)
11169 struct ix86_frame *frame = &cfun->machine->frame;
11170 struct machine_function *m = cfun->machine;
11171 unsigned HOST_WIDE_INT stack_alignment_needed;
11172 HOST_WIDE_INT offset;
11173 unsigned HOST_WIDE_INT preferred_alignment;
11174 HOST_WIDE_INT size = get_frame_size ();
11175 HOST_WIDE_INT to_allocate;
11177 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11178 * ms_abi functions that call a sysv function. We now need to prune away
11179 * cases where it should be disabled. */
11180 if (TARGET_64BIT && m->call_ms2sysv)
11182 gcc_assert (TARGET_64BIT_MS_ABI);
11183 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11184 gcc_assert (!TARGET_SEH);
11185 gcc_assert (TARGET_SSE);
11186 gcc_assert (!ix86_using_red_zone ());
11188 if (crtl->calls_eh_return)
11190 gcc_assert (!reload_completed);
11191 m->call_ms2sysv = false;
11192 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11195 else if (ix86_static_chain_on_stack)
11197 gcc_assert (!reload_completed);
11198 m->call_ms2sysv = false;
11199 warn_once_call_ms2sysv_xlogues ("static call chains");
11202 /* Finally, compute which registers the stub will manage. */
11205 unsigned count = xlogue_layout::count_stub_managed_regs ();
11206 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11207 m->call_ms2sysv_pad_in = 0;
11211 frame->nregs = ix86_nsaved_regs ();
11212 frame->nsseregs = ix86_nsaved_sseregs ();
11214 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11215 except for function prologues, leaf functions and when the defult
11216 incoming stack boundary is overriden at command line or via
11217 force_align_arg_pointer attribute.
11219 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
11220 at call sites, including profile function calls.
11222 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
11223 && crtl->preferred_stack_boundary < 128)
11224 && (!crtl->is_leaf || cfun->calls_alloca != 0
11225 || ix86_current_function_calls_tls_descriptor
11226 || (TARGET_MACHO && crtl->profile)
11227 || ix86_incoming_stack_boundary < 128))
11229 crtl->preferred_stack_boundary = 128;
11230 crtl->stack_alignment_needed = 128;
11233 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11234 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11236 gcc_assert (!size || stack_alignment_needed);
11237 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11238 gcc_assert (preferred_alignment <= stack_alignment_needed);
11240 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11241 gcc_assert (TARGET_64BIT || !frame->nsseregs);
11242 if (TARGET_64BIT && m->call_ms2sysv)
11244 gcc_assert (stack_alignment_needed >= 16);
11245 gcc_assert (!frame->nsseregs);
11248 /* For SEH we have to limit the amount of code movement into the prologue.
11249 At present we do this via a BLOCKAGE, at which point there's very little
11250 scheduling that can be done, which means that there's very little point
11251 in doing anything except PUSHs. */
11253 m->use_fast_prologue_epilogue = false;
11254 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11256 int count = frame->nregs;
11257 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11259 /* The fast prologue uses move instead of push to save registers. This
11260 is significantly longer, but also executes faster as modern hardware
11261 can execute the moves in parallel, but can't do that for push/pop.
11263 Be careful about choosing what prologue to emit: When function takes
11264 many instructions to execute we may use slow version as well as in
11265 case function is known to be outside hot spot (this is known with
11266 feedback only). Weight the size of function by number of registers
11267 to save as it is cheap to use one or two push instructions but very
11268 slow to use many of them. */
11270 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11271 if (node->frequency < NODE_FREQUENCY_NORMAL
11272 || (flag_branch_probabilities
11273 && node->frequency < NODE_FREQUENCY_HOT))
11274 m->use_fast_prologue_epilogue = false;
11276 m->use_fast_prologue_epilogue
11277 = !expensive_function_p (count);
11280 frame->save_regs_using_mov
11281 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11282 /* If static stack checking is enabled and done with probes,
11283 the registers need to be saved before allocating the frame. */
11284 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11286 /* Skip return address and error code in exception handler. */
11287 offset = INCOMING_FRAME_SP_OFFSET;
11289 /* Skip pushed static chain. */
11290 if (ix86_static_chain_on_stack)
11291 offset += UNITS_PER_WORD;
11293 /* Skip saved base pointer. */
11294 if (frame_pointer_needed)
11295 offset += UNITS_PER_WORD;
11296 frame->hfp_save_offset = offset;
11298 /* The traditional frame pointer location is at the top of the frame. */
11299 frame->hard_frame_pointer_offset = offset;
11301 /* Register save area */
11302 offset += frame->nregs * UNITS_PER_WORD;
11303 frame->reg_save_offset = offset;
11305 /* On SEH target, registers are pushed just before the frame pointer
11308 frame->hard_frame_pointer_offset = offset;
11310 /* Calculate the size of the va-arg area (not including padding, if any). */
11311 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11313 /* Also adjust stack_realign_offset for the largest alignment of
11314 stack slot actually used. */
11315 if (stack_realign_fp
11316 || (cfun->machine->max_used_stack_alignment != 0
11317 && (offset % cfun->machine->max_used_stack_alignment) != 0))
11319 /* We may need a 16-byte aligned stack for the remainder of the
11320 register save area, but the stack frame for the local function
11321 may require a greater alignment if using AVX/2/512. In order
11322 to avoid wasting space, we first calculate the space needed for
11323 the rest of the register saves, add that to the stack pointer,
11324 and then realign the stack to the boundary of the start of the
11325 frame for the local function. */
11326 HOST_WIDE_INT space_needed = 0;
11327 HOST_WIDE_INT sse_reg_space_needed = 0;
11331 if (m->call_ms2sysv)
11333 m->call_ms2sysv_pad_in = 0;
11334 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11337 else if (frame->nsseregs)
11338 /* The only ABI that has saved SSE registers (Win64) also has a
11339 16-byte aligned default stack. However, many programs violate
11340 the ABI, and Wine64 forces stack realignment to compensate. */
11341 space_needed = frame->nsseregs * 16;
11343 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11345 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11346 rounding to be pedantic. */
11347 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11350 space_needed = frame->va_arg_size;
11352 /* Record the allocation size required prior to the realignment AND. */
11353 frame->stack_realign_allocate = space_needed;
11355 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11356 before this point are not directly comparable with values below
11357 this point. Use sp_valid_at to determine if the stack pointer is
11358 valid for a given offset, fp_valid_at for the frame pointer, or
11359 choose_baseaddr to have a base register chosen for you.
11361 Note that the result of (frame->stack_realign_offset
11362 & (stack_alignment_needed - 1)) may not equal zero. */
11363 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11364 frame->stack_realign_offset = offset - space_needed;
11365 frame->sse_reg_save_offset = frame->stack_realign_offset
11366 + sse_reg_space_needed;
11370 frame->stack_realign_offset = offset;
11372 if (TARGET_64BIT && m->call_ms2sysv)
11374 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11375 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11378 /* Align and set SSE register save area. */
11379 else if (frame->nsseregs)
11381 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11382 required and the DRAP re-alignment boundary is at least 16 bytes,
11383 then we want the SSE register save area properly aligned. */
11384 if (ix86_incoming_stack_boundary >= 128
11385 || (stack_realign_drap && stack_alignment_needed >= 16))
11386 offset = ROUND_UP (offset, 16);
11387 offset += frame->nsseregs * 16;
11389 frame->sse_reg_save_offset = offset;
11390 offset += frame->va_arg_size;
11393 /* Align start of frame for local function. When a function call
11394 is removed, it may become a leaf function. But if argument may
11395 be passed on stack, we need to align the stack when there is no
11397 if (m->call_ms2sysv
11398 || frame->va_arg_size != 0
11401 || (!crtl->tail_call_emit
11402 && cfun->machine->outgoing_args_on_stack)
11403 || cfun->calls_alloca
11404 || ix86_current_function_calls_tls_descriptor)
11405 offset = ROUND_UP (offset, stack_alignment_needed);
11407 /* Frame pointer points here. */
11408 frame->frame_pointer_offset = offset;
11412 /* Add outgoing arguments area. Can be skipped if we eliminated
11413 all the function calls as dead code.
11414 Skipping is however impossible when function calls alloca. Alloca
11415 expander assumes that last crtl->outgoing_args_size
11416 of stack frame are unused. */
11417 if (ACCUMULATE_OUTGOING_ARGS
11418 && (!crtl->is_leaf || cfun->calls_alloca
11419 || ix86_current_function_calls_tls_descriptor))
11421 offset += crtl->outgoing_args_size;
11422 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11425 frame->outgoing_arguments_size = 0;
11427 /* Align stack boundary. Only needed if we're calling another function
11428 or using alloca. */
11429 if (!crtl->is_leaf || cfun->calls_alloca
11430 || ix86_current_function_calls_tls_descriptor)
11431 offset = ROUND_UP (offset, preferred_alignment);
11433 /* We've reached end of stack frame. */
11434 frame->stack_pointer_offset = offset;
11436 /* Size prologue needs to allocate. */
11437 to_allocate = offset - frame->sse_reg_save_offset;
11439 if ((!to_allocate && frame->nregs <= 1)
11440 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
11441 /* If stack clash probing needs a loop, then it needs a
11442 scratch register. But the returned register is only guaranteed
11443 to be safe to use after register saves are complete. So if
11444 stack clash protections are enabled and the allocated frame is
11445 larger than the probe interval, then use pushes to save
11446 callee saved registers. */
11447 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
11448 frame->save_regs_using_mov = false;
11450 if (ix86_using_red_zone ()
11451 && crtl->sp_is_unchanging
11453 && !ix86_pc_thunk_call_expanded
11454 && !ix86_current_function_calls_tls_descriptor)
11456 frame->red_zone_size = to_allocate;
11457 if (frame->save_regs_using_mov)
11458 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11459 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11460 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11463 frame->red_zone_size = 0;
11464 frame->stack_pointer_offset -= frame->red_zone_size;
11466 /* The SEH frame pointer location is near the bottom of the frame.
11467 This is enforced by the fact that the difference between the
11468 stack pointer and the frame pointer is limited to 240 bytes in
11469 the unwind data structure. */
11472 HOST_WIDE_INT diff;
11474 /* If we can leave the frame pointer where it is, do so. Also, returns
11475 the establisher frame for __builtin_frame_address (0). */
11476 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11477 if (diff <= SEH_MAX_FRAME_SIZE
11478 && (diff > 240 || (diff & 15) != 0)
11479 && !crtl->accesses_prior_frames)
11481 /* Ideally we'd determine what portion of the local stack frame
11482 (within the constraint of the lowest 240) is most heavily used.
11483 But without that complication, simply bias the frame pointer
11484 by 128 bytes so as to maximize the amount of the local stack
11485 frame that is addressable with 8-bit offsets. */
11486 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11491 /* This is semi-inlined memory_address_length, but simplified
11492 since we know that we're always dealing with reg+offset, and
11493 to avoid having to create and discard all that rtl. */
11496 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11502 /* EBP and R13 cannot be encoded without an offset. */
11503 len = (regno == BP_REG || regno == R13_REG);
11505 else if (IN_RANGE (offset, -128, 127))
11508 /* ESP and R12 must be encoded with a SIB byte. */
11509 if (regno == SP_REG || regno == R12_REG)
11515 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11516 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11519 sp_valid_at (HOST_WIDE_INT cfa_offset)
11521 const struct machine_frame_state &fs = cfun->machine->fs;
11522 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11524 /* Validate that the cfa_offset isn't in a "no-man's land". */
11525 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11528 return fs.sp_valid;
11531 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11532 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11535 fp_valid_at (HOST_WIDE_INT cfa_offset)
11537 const struct machine_frame_state &fs = cfun->machine->fs;
11538 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
11540 /* Validate that the cfa_offset isn't in a "no-man's land". */
11541 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
11544 return fs.fp_valid;
11547 /* Choose a base register based upon alignment requested, speed and/or
11551 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
11552 HOST_WIDE_INT &base_offset,
11553 unsigned int align_reqested, unsigned int *align)
11555 const struct machine_function *m = cfun->machine;
11556 unsigned int hfp_align;
11557 unsigned int drap_align;
11558 unsigned int sp_align;
11559 bool hfp_ok = fp_valid_at (cfa_offset);
11560 bool drap_ok = m->fs.drap_valid;
11561 bool sp_ok = sp_valid_at (cfa_offset);
11563 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
11565 /* Filter out any registers that don't meet the requested alignment
11567 if (align_reqested)
11569 if (m->fs.realigned)
11570 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
11571 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
11572 notes (which we would need to use a realigned stack pointer),
11573 so disable on SEH targets. */
11574 else if (m->fs.sp_realigned)
11575 sp_align = crtl->stack_alignment_needed;
11577 hfp_ok = hfp_ok && hfp_align >= align_reqested;
11578 drap_ok = drap_ok && drap_align >= align_reqested;
11579 sp_ok = sp_ok && sp_align >= align_reqested;
11582 if (m->use_fast_prologue_epilogue)
11584 /* Choose the base register most likely to allow the most scheduling
11585 opportunities. Generally FP is valid throughout the function,
11586 while DRAP must be reloaded within the epilogue. But choose either
11587 over the SP due to increased encoding size. */
11591 base_reg = hard_frame_pointer_rtx;
11592 base_offset = m->fs.fp_offset - cfa_offset;
11596 base_reg = crtl->drap_reg;
11597 base_offset = 0 - cfa_offset;
11601 base_reg = stack_pointer_rtx;
11602 base_offset = m->fs.sp_offset - cfa_offset;
11607 HOST_WIDE_INT toffset;
11608 int len = 16, tlen;
11610 /* Choose the base register with the smallest address encoding.
11611 With a tie, choose FP > DRAP > SP. */
11614 base_reg = stack_pointer_rtx;
11615 base_offset = m->fs.sp_offset - cfa_offset;
11616 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11620 toffset = 0 - cfa_offset;
11621 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11624 base_reg = crtl->drap_reg;
11625 base_offset = toffset;
11631 toffset = m->fs.fp_offset - cfa_offset;
11632 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11635 base_reg = hard_frame_pointer_rtx;
11636 base_offset = toffset;
11642 /* Set the align return value. */
11645 if (base_reg == stack_pointer_rtx)
11647 else if (base_reg == crtl->drap_reg)
11648 *align = drap_align;
11649 else if (base_reg == hard_frame_pointer_rtx)
11650 *align = hfp_align;
11654 /* Return an RTX that points to CFA_OFFSET within the stack frame and
11655 the alignment of address. If ALIGN is non-null, it should point to
11656 an alignment value (in bits) that is preferred or zero and will
11657 recieve the alignment of the base register that was selected,
11658 irrespective of rather or not CFA_OFFSET is a multiple of that
11659 alignment value. If it is possible for the base register offset to be
11660 non-immediate then SCRATCH_REGNO should specify a scratch register to
11663 The valid base registers are taken from CFUN->MACHINE->FS. */
11666 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
11667 unsigned int scratch_regno = INVALID_REGNUM)
11669 rtx base_reg = NULL;
11670 HOST_WIDE_INT base_offset = 0;
11672 /* If a specific alignment is requested, try to get a base register
11673 with that alignment first. */
11674 if (align && *align)
11675 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
11678 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
11680 gcc_assert (base_reg != NULL);
11682 rtx base_offset_rtx = GEN_INT (base_offset);
11684 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
11686 gcc_assert (scratch_regno != INVALID_REGNUM);
11688 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11689 emit_move_insn (scratch_reg, base_offset_rtx);
11691 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
11694 return plus_constant (Pmode, base_reg, base_offset);
11697 /* Emit code to save registers in the prologue. */
11700 ix86_emit_save_regs (void)
11702 unsigned int regno;
11705 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11706 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11708 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11709 RTX_FRAME_RELATED_P (insn) = 1;
11713 /* Emit a single register save at CFA - CFA_OFFSET. */
11716 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11717 HOST_WIDE_INT cfa_offset)
11719 struct machine_function *m = cfun->machine;
11720 rtx reg = gen_rtx_REG (mode, regno);
11721 rtx mem, addr, base, insn;
11722 unsigned int align = GET_MODE_ALIGNMENT (mode);
11724 addr = choose_baseaddr (cfa_offset, &align);
11725 mem = gen_frame_mem (mode, addr);
11727 /* The location aligment depends upon the base register. */
11728 align = MIN (GET_MODE_ALIGNMENT (mode), align);
11729 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
11730 set_mem_align (mem, align);
11732 insn = emit_insn (gen_rtx_SET (mem, reg));
11733 RTX_FRAME_RELATED_P (insn) = 1;
11736 if (GET_CODE (base) == PLUS)
11737 base = XEXP (base, 0);
11738 gcc_checking_assert (REG_P (base));
11740 /* When saving registers into a re-aligned local stack frame, avoid
11741 any tricky guessing by dwarf2out. */
11742 if (m->fs.realigned)
11744 gcc_checking_assert (stack_realign_drap);
11746 if (regno == REGNO (crtl->drap_reg))
11748 /* A bit of a hack. We force the DRAP register to be saved in
11749 the re-aligned stack frame, which provides us with a copy
11750 of the CFA that will last past the prologue. Install it. */
11751 gcc_checking_assert (cfun->machine->fs.fp_valid);
11752 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11753 cfun->machine->fs.fp_offset - cfa_offset);
11754 mem = gen_rtx_MEM (mode, addr);
11755 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11759 /* The frame pointer is a stable reference within the
11760 aligned frame. Use it. */
11761 gcc_checking_assert (cfun->machine->fs.fp_valid);
11762 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11763 cfun->machine->fs.fp_offset - cfa_offset);
11764 mem = gen_rtx_MEM (mode, addr);
11765 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11769 else if (base == stack_pointer_rtx && m->fs.sp_realigned
11770 && cfa_offset >= m->fs.sp_realigned_offset)
11772 gcc_checking_assert (stack_realign_fp);
11773 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11776 /* The memory may not be relative to the current CFA register,
11777 which means that we may need to generate a new pattern for
11778 use by the unwind info. */
11779 else if (base != m->fs.cfa_reg)
11781 addr = plus_constant (Pmode, m->fs.cfa_reg,
11782 m->fs.cfa_offset - cfa_offset);
11783 mem = gen_rtx_MEM (mode, addr);
11784 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11788 /* Emit code to save registers using MOV insns.
11789 First register is stored at CFA - CFA_OFFSET. */
11791 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11793 unsigned int regno;
11795 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11796 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11798 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11799 cfa_offset -= UNITS_PER_WORD;
11803 /* Emit code to save SSE registers using MOV insns.
11804 First register is stored at CFA - CFA_OFFSET. */
11806 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11808 unsigned int regno;
11810 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11811 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11813 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11814 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11818 static GTY(()) rtx queued_cfa_restores;
11820 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11821 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11822 Don't add the note if the previously saved value will be left untouched
11823 within stack red-zone till return, as unwinders can find the same value
11824 in the register and on the stack. */
11827 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11829 if (!crtl->shrink_wrapped
11830 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11835 add_reg_note (insn, REG_CFA_RESTORE, reg);
11836 RTX_FRAME_RELATED_P (insn) = 1;
11839 queued_cfa_restores
11840 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11843 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11846 ix86_add_queued_cfa_restore_notes (rtx insn)
11849 if (!queued_cfa_restores)
11851 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11853 XEXP (last, 1) = REG_NOTES (insn);
11854 REG_NOTES (insn) = queued_cfa_restores;
11855 queued_cfa_restores = NULL_RTX;
11856 RTX_FRAME_RELATED_P (insn) = 1;
11859 /* Expand prologue or epilogue stack adjustment.
11860 The pattern exist to put a dependency on all ebp-based memory accesses.
11861 STYLE should be negative if instructions should be marked as frame related,
11862 zero if %r11 register is live and cannot be freely used and positive
11866 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11867 int style, bool set_cfa)
11869 struct machine_function *m = cfun->machine;
11871 bool add_frame_related_expr = false;
11873 if (Pmode == SImode)
11874 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11875 else if (x86_64_immediate_operand (offset, DImode))
11876 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11880 /* r11 is used by indirect sibcall return as well, set before the
11881 epilogue and used after the epilogue. */
11883 tmp = gen_rtx_REG (DImode, R11_REG);
11886 gcc_assert (src != hard_frame_pointer_rtx
11887 && dest != hard_frame_pointer_rtx);
11888 tmp = hard_frame_pointer_rtx;
11890 insn = emit_insn (gen_rtx_SET (tmp, offset));
11892 add_frame_related_expr = true;
11894 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11897 insn = emit_insn (insn);
11899 ix86_add_queued_cfa_restore_notes (insn);
11905 gcc_assert (m->fs.cfa_reg == src);
11906 m->fs.cfa_offset += INTVAL (offset);
11907 m->fs.cfa_reg = dest;
11909 r = gen_rtx_PLUS (Pmode, src, offset);
11910 r = gen_rtx_SET (dest, r);
11911 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11912 RTX_FRAME_RELATED_P (insn) = 1;
11914 else if (style < 0)
11916 RTX_FRAME_RELATED_P (insn) = 1;
11917 if (add_frame_related_expr)
11919 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11920 r = gen_rtx_SET (dest, r);
11921 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11925 if (dest == stack_pointer_rtx)
11927 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11928 bool valid = m->fs.sp_valid;
11929 bool realigned = m->fs.sp_realigned;
11931 if (src == hard_frame_pointer_rtx)
11933 valid = m->fs.fp_valid;
11935 ooffset = m->fs.fp_offset;
11937 else if (src == crtl->drap_reg)
11939 valid = m->fs.drap_valid;
11945 /* Else there are two possibilities: SP itself, which we set
11946 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11947 taken care of this by hand along the eh_return path. */
11948 gcc_checking_assert (src == stack_pointer_rtx
11949 || offset == const0_rtx);
11952 m->fs.sp_offset = ooffset - INTVAL (offset);
11953 m->fs.sp_valid = valid;
11954 m->fs.sp_realigned = realigned;
11959 /* Find an available register to be used as dynamic realign argument
11960 pointer regsiter. Such a register will be written in prologue and
11961 used in begin of body, so it must not be
11962 1. parameter passing register.
11964 We reuse static-chain register if it is available. Otherwise, we
11965 use DI for i386 and R13 for x86-64. We chose R13 since it has
11968 Return: the regno of chosen register. */
11970 static unsigned int
11971 find_drap_reg (void)
11973 tree decl = cfun->decl;
11975 /* Always use callee-saved register if there are no caller-saved
11979 /* Use R13 for nested function or function need static chain.
11980 Since function with tail call may use any caller-saved
11981 registers in epilogue, DRAP must not use caller-saved
11982 register in such case. */
11983 if (DECL_STATIC_CHAIN (decl)
11984 || cfun->machine->no_caller_saved_registers
11985 || crtl->tail_call_emit)
11992 /* Use DI for nested function or function need static chain.
11993 Since function with tail call may use any caller-saved
11994 registers in epilogue, DRAP must not use caller-saved
11995 register in such case. */
11996 if (DECL_STATIC_CHAIN (decl)
11997 || cfun->machine->no_caller_saved_registers
11998 || crtl->tail_call_emit)
12001 /* Reuse static chain register if it isn't used for parameter
12003 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
12005 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
12006 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
12013 /* Handle a "force_align_arg_pointer" attribute. */
12016 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
12017 tree, int, bool *no_add_attrs)
12019 if (TREE_CODE (*node) != FUNCTION_TYPE
12020 && TREE_CODE (*node) != METHOD_TYPE
12021 && TREE_CODE (*node) != FIELD_DECL
12022 && TREE_CODE (*node) != TYPE_DECL)
12024 warning (OPT_Wattributes, "%qE attribute only applies to functions",
12026 *no_add_attrs = true;
12032 /* Return minimum incoming stack alignment. */
12034 static unsigned int
12035 ix86_minimum_incoming_stack_boundary (bool sibcall)
12037 unsigned int incoming_stack_boundary;
12039 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
12040 if (cfun->machine->func_type != TYPE_NORMAL)
12041 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
12042 /* Prefer the one specified at command line. */
12043 else if (ix86_user_incoming_stack_boundary)
12044 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12045 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12046 if -mstackrealign is used, it isn't used for sibcall check and
12047 estimated stack alignment is 128bit. */
12049 && ix86_force_align_arg_pointer
12050 && crtl->stack_alignment_estimated == 128)
12051 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12053 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12055 /* Incoming stack alignment can be changed on individual functions
12056 via force_align_arg_pointer attribute. We use the smallest
12057 incoming stack boundary. */
12058 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12059 && lookup_attribute (ix86_force_align_arg_pointer_string,
12060 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12061 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12063 /* The incoming stack frame has to be aligned at least at
12064 parm_stack_boundary. */
12065 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12066 incoming_stack_boundary = crtl->parm_stack_boundary;
12068 /* Stack at entrance of main is aligned by runtime. We use the
12069 smallest incoming stack boundary. */
12070 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12071 && DECL_NAME (current_function_decl)
12072 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12073 && DECL_FILE_SCOPE_P (current_function_decl))
12074 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12076 return incoming_stack_boundary;
12079 /* Update incoming stack boundary and estimated stack alignment. */
12082 ix86_update_stack_boundary (void)
12084 ix86_incoming_stack_boundary
12085 = ix86_minimum_incoming_stack_boundary (false);
12087 /* x86_64 vararg needs 16byte stack alignment for register save area. */
12090 && crtl->stack_alignment_estimated < 128)
12091 crtl->stack_alignment_estimated = 128;
12093 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12094 if (ix86_tls_descriptor_calls_expanded_in_cfun
12095 && crtl->preferred_stack_boundary < 128)
12096 crtl->preferred_stack_boundary = 128;
12099 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12100 needed or an rtx for DRAP otherwise. */
12103 ix86_get_drap_rtx (void)
12105 /* We must use DRAP if there are outgoing arguments on stack and
12106 ACCUMULATE_OUTGOING_ARGS is false. */
12107 if (ix86_force_drap
12108 || (cfun->machine->outgoing_args_on_stack
12109 && !ACCUMULATE_OUTGOING_ARGS))
12110 crtl->need_drap = true;
12112 if (stack_realign_drap)
12114 /* Assign DRAP to vDRAP and returns vDRAP */
12115 unsigned int regno = find_drap_reg ();
12118 rtx_insn *seq, *insn;
12120 arg_ptr = gen_rtx_REG (Pmode, regno);
12121 crtl->drap_reg = arg_ptr;
12124 drap_vreg = copy_to_reg (arg_ptr);
12125 seq = get_insns ();
12128 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12131 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12132 RTX_FRAME_RELATED_P (insn) = 1;
12140 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12143 ix86_internal_arg_pointer (void)
12145 return virtual_incoming_args_rtx;
12148 struct scratch_reg {
12153 /* Return a short-lived scratch register for use on function entry.
12154 In 32-bit mode, it is valid only after the registers are saved
12155 in the prologue. This register must be released by means of
12156 release_scratch_register_on_entry once it is dead. */
12159 get_scratch_register_on_entry (struct scratch_reg *sr)
12167 /* We always use R11 in 64-bit mode. */
12172 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12174 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12176 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12177 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12178 int regparm = ix86_function_regparm (fntype, decl);
12180 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12182 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12183 for the static chain register. */
12184 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12185 && drap_regno != AX_REG)
12187 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12188 for the static chain register. */
12189 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12191 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12193 /* ecx is the static chain register. */
12194 else if (regparm < 3 && !fastcall_p && !thiscall_p
12196 && drap_regno != CX_REG)
12198 else if (ix86_save_reg (BX_REG, true, false))
12200 /* esi is the static chain register. */
12201 else if (!(regparm == 3 && static_chain_p)
12202 && ix86_save_reg (SI_REG, true, false))
12204 else if (ix86_save_reg (DI_REG, true, false))
12208 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12213 sr->reg = gen_rtx_REG (Pmode, regno);
12216 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12217 RTX_FRAME_RELATED_P (insn) = 1;
12221 /* Release a scratch register obtained from the preceding function.
12223 If RELEASE_VIA_POP is true, we just pop the register off the stack
12224 to release it. This is what non-Linux systems use with -fstack-check.
12226 Otherwise we use OFFSET to locate the saved register and the
12227 allocated stack space becomes part of the local frame and is
12228 deallocated by the epilogue. */
12231 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
12232 bool release_via_pop)
12236 if (release_via_pop)
12238 struct machine_function *m = cfun->machine;
12239 rtx x, insn = emit_insn (gen_pop (sr->reg));
12241 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
12242 RTX_FRAME_RELATED_P (insn) = 1;
12243 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12244 x = gen_rtx_SET (stack_pointer_rtx, x);
12245 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12246 m->fs.sp_offset -= UNITS_PER_WORD;
12250 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
12251 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
12257 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12259 This differs from the next routine in that it tries hard to prevent
12260 attacks that jump the stack guard. Thus it is never allowed to allocate
12261 more than PROBE_INTERVAL bytes of stack space without a suitable
12264 INT_REGISTERS_SAVED is true if integer registers have already been
12265 pushed on the stack. */
12268 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
12269 const bool int_registers_saved)
12271 struct machine_function *m = cfun->machine;
12273 /* If this function does not statically allocate stack space, then
12274 no probes are needed. */
12277 /* However, the allocation of space via pushes for register
12278 saves could be viewed as allocating space, but without the
12280 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12281 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12283 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12287 /* If we are a noreturn function, then we have to consider the
12288 possibility that we're called via a jump rather than a call.
12290 Thus we don't have the implicit probe generated by saving the
12291 return address into the stack at the call. Thus, the stack
12292 pointer could be anywhere in the guard page. The safe thing
12293 to do is emit a probe now.
12295 The probe can be avoided if we have already emitted any callee
12296 register saves into the stack or have a frame pointer (which will
12297 have been saved as well). Those saves will function as implicit
12300 ?!? This should be revamped to work like aarch64 and s390 where
12301 we track the offset from the most recent probe. Normally that
12302 offset would be zero. For a noreturn function we would reset
12303 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12304 we just probe when we cross PROBE_INTERVAL. */
12305 if (TREE_THIS_VOLATILE (cfun->decl)
12306 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
12308 /* We can safely use any register here since we're just going to push
12309 its value and immediately pop it back. But we do try and avoid
12310 argument passing registers so as not to introduce dependencies in
12311 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
12312 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
12313 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
12314 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
12315 m->fs.sp_offset -= UNITS_PER_WORD;
12316 if (m->fs.cfa_reg == stack_pointer_rtx)
12318 m->fs.cfa_offset -= UNITS_PER_WORD;
12319 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12320 x = gen_rtx_SET (stack_pointer_rtx, x);
12321 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
12322 RTX_FRAME_RELATED_P (insn_push) = 1;
12323 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12324 x = gen_rtx_SET (stack_pointer_rtx, x);
12325 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
12326 RTX_FRAME_RELATED_P (insn_pop) = 1;
12328 emit_insn (gen_blockage ());
12331 /* If we allocate less than the size of the guard statically,
12332 then no probing is necessary, but we do need to allocate
12334 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12336 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12337 GEN_INT (-size), -1,
12338 m->fs.cfa_reg == stack_pointer_rtx);
12339 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12343 /* We're allocating a large enough stack frame that we need to
12344 emit probes. Either emit them inline or in a loop depending
12346 HOST_WIDE_INT probe_interval = get_probe_interval ();
12347 if (size <= 4 * probe_interval)
12350 for (i = probe_interval; i <= size; i += probe_interval)
12352 /* Allocate PROBE_INTERVAL bytes. */
12354 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12355 GEN_INT (-probe_interval), -1,
12356 m->fs.cfa_reg == stack_pointer_rtx);
12357 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12359 /* And probe at *sp. */
12360 emit_stack_probe (stack_pointer_rtx);
12361 emit_insn (gen_blockage ());
12364 /* We need to allocate space for the residual, but we do not need
12365 to probe the residual. */
12366 HOST_WIDE_INT residual = (i - probe_interval - size);
12368 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12369 GEN_INT (residual), -1,
12370 m->fs.cfa_reg == stack_pointer_rtx);
12371 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12375 /* We expect the GP registers to be saved when probes are used
12376 as the probing sequences might need a scratch register and
12377 the routine to allocate one assumes the integer registers
12378 have already been saved. */
12379 gcc_assert (int_registers_saved);
12381 struct scratch_reg sr;
12382 get_scratch_register_on_entry (&sr);
12384 /* If we needed to save a register, then account for any space
12385 that was pushed (we are not going to pop the register when
12386 we do the restore). */
12388 size -= UNITS_PER_WORD;
12390 /* Step 1: round SIZE down to a multiple of the interval. */
12391 HOST_WIDE_INT rounded_size = size & -probe_interval;
12393 /* Step 2: compute final value of the loop counter. Use lea if
12395 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12397 if (address_no_seg_operand (addr, Pmode))
12398 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12401 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12402 insn = emit_insn (gen_rtx_SET (sr.reg,
12403 gen_rtx_PLUS (Pmode, sr.reg,
12404 stack_pointer_rtx)));
12406 if (m->fs.cfa_reg == stack_pointer_rtx)
12408 add_reg_note (insn, REG_CFA_DEF_CFA,
12409 plus_constant (Pmode, sr.reg,
12410 m->fs.cfa_offset + rounded_size));
12411 RTX_FRAME_RELATED_P (insn) = 1;
12414 /* Step 3: the loop. */
12415 rtx size_rtx = GEN_INT (rounded_size);
12416 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12418 if (m->fs.cfa_reg == stack_pointer_rtx)
12420 m->fs.cfa_offset += rounded_size;
12421 add_reg_note (insn, REG_CFA_DEF_CFA,
12422 plus_constant (Pmode, stack_pointer_rtx,
12423 m->fs.cfa_offset));
12424 RTX_FRAME_RELATED_P (insn) = 1;
12426 m->fs.sp_offset += rounded_size;
12427 emit_insn (gen_blockage ());
12429 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12430 is equal to ROUNDED_SIZE. */
12432 if (size != rounded_size)
12433 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12434 GEN_INT (rounded_size - size), -1,
12435 m->fs.cfa_reg == stack_pointer_rtx);
12436 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12438 /* This does not deallocate the space reserved for the scratch
12439 register. That will be deallocated in the epilogue. */
12440 release_scratch_register_on_entry (&sr, size, false);
12443 /* Make sure nothing is scheduled before we are done. */
12444 emit_insn (gen_blockage ());
12447 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12449 INT_REGISTERS_SAVED is true if integer registers have already been
12450 pushed on the stack. */
12453 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
12454 const bool int_registers_saved)
12456 /* We skip the probe for the first interval + a small dope of 4 words and
12457 probe that many bytes past the specified size to maintain a protection
12458 area at the botton of the stack. */
12459 const int dope = 4 * UNITS_PER_WORD;
12460 rtx size_rtx = GEN_INT (size), last;
12462 /* See if we have a constant small number of probes to generate. If so,
12463 that's the easy case. The run-time loop is made up of 9 insns in the
12464 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12465 for n # of intervals. */
12466 if (size <= 4 * get_probe_interval ())
12468 HOST_WIDE_INT i, adjust;
12469 bool first_probe = true;
12471 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12472 values of N from 1 until it exceeds SIZE. If only one probe is
12473 needed, this will not generate any code. Then adjust and probe
12474 to PROBE_INTERVAL + SIZE. */
12475 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12479 adjust = 2 * get_probe_interval () + dope;
12480 first_probe = false;
12483 adjust = get_probe_interval ();
12485 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12486 plus_constant (Pmode, stack_pointer_rtx,
12488 emit_stack_probe (stack_pointer_rtx);
12492 adjust = size + get_probe_interval () + dope;
12494 adjust = size + get_probe_interval () - i;
12496 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12497 plus_constant (Pmode, stack_pointer_rtx,
12499 emit_stack_probe (stack_pointer_rtx);
12501 /* Adjust back to account for the additional first interval. */
12502 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12503 plus_constant (Pmode, stack_pointer_rtx,
12504 (get_probe_interval ()
12508 /* Otherwise, do the same as above, but in a loop. Note that we must be
12509 extra careful with variables wrapping around because we might be at
12510 the very top (or the very bottom) of the address space and we have
12511 to be able to handle this case properly; in particular, we use an
12512 equality test for the loop condition. */
12515 /* We expect the GP registers to be saved when probes are used
12516 as the probing sequences might need a scratch register and
12517 the routine to allocate one assumes the integer registers
12518 have already been saved. */
12519 gcc_assert (int_registers_saved);
12521 HOST_WIDE_INT rounded_size;
12522 struct scratch_reg sr;
12524 get_scratch_register_on_entry (&sr);
12526 /* If we needed to save a register, then account for any space
12527 that was pushed (we are not going to pop the register when
12528 we do the restore). */
12530 size -= UNITS_PER_WORD;
12532 /* Step 1: round SIZE to the previous multiple of the interval. */
12534 rounded_size = ROUND_DOWN (size, get_probe_interval ());
12537 /* Step 2: compute initial and final value of the loop counter. */
12539 /* SP = SP_0 + PROBE_INTERVAL. */
12540 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12541 plus_constant (Pmode, stack_pointer_rtx,
12542 - (get_probe_interval () + dope))));
12544 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12545 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12546 emit_insn (gen_rtx_SET (sr.reg,
12547 plus_constant (Pmode, stack_pointer_rtx,
12551 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12552 emit_insn (gen_rtx_SET (sr.reg,
12553 gen_rtx_PLUS (Pmode, sr.reg,
12554 stack_pointer_rtx)));
12558 /* Step 3: the loop
12562 SP = SP + PROBE_INTERVAL
12565 while (SP != LAST_ADDR)
12567 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12568 values of N from 1 until it is equal to ROUNDED_SIZE. */
12570 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12573 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12574 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12576 if (size != rounded_size)
12578 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12579 plus_constant (Pmode, stack_pointer_rtx,
12580 rounded_size - size)));
12581 emit_stack_probe (stack_pointer_rtx);
12584 /* Adjust back to account for the additional first interval. */
12585 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12586 plus_constant (Pmode, stack_pointer_rtx,
12587 (get_probe_interval ()
12590 /* This does not deallocate the space reserved for the scratch
12591 register. That will be deallocated in the epilogue. */
12592 release_scratch_register_on_entry (&sr, size, false);
12595 /* Even if the stack pointer isn't the CFA register, we need to correctly
12596 describe the adjustments made to it, in particular differentiate the
12597 frame-related ones from the frame-unrelated ones. */
12600 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12601 XVECEXP (expr, 0, 0)
12602 = gen_rtx_SET (stack_pointer_rtx,
12603 plus_constant (Pmode, stack_pointer_rtx, -size));
12604 XVECEXP (expr, 0, 1)
12605 = gen_rtx_SET (stack_pointer_rtx,
12606 plus_constant (Pmode, stack_pointer_rtx,
12607 get_probe_interval () + dope + size));
12608 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12609 RTX_FRAME_RELATED_P (last) = 1;
12611 cfun->machine->fs.sp_offset += size;
12614 /* Make sure nothing is scheduled before we are done. */
12615 emit_insn (gen_blockage ());
12618 /* Adjust the stack pointer up to REG while probing it. */
12621 output_adjust_stack_and_probe (rtx reg)
12623 static int labelno = 0;
12627 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12630 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12632 /* SP = SP + PROBE_INTERVAL. */
12633 xops[0] = stack_pointer_rtx;
12634 xops[1] = GEN_INT (get_probe_interval ());
12635 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12638 xops[1] = const0_rtx;
12639 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12641 /* Test if SP == LAST_ADDR. */
12642 xops[0] = stack_pointer_rtx;
12644 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12647 fputs ("\tjne\t", asm_out_file);
12648 assemble_name_raw (asm_out_file, loop_lab);
12649 fputc ('\n', asm_out_file);
12654 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12655 inclusive. These are offsets from the current stack pointer.
12657 INT_REGISTERS_SAVED is true if integer registers have already been
12658 pushed on the stack. */
12661 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
12662 const bool int_registers_saved)
12664 /* See if we have a constant small number of probes to generate. If so,
12665 that's the easy case. The run-time loop is made up of 6 insns in the
12666 generic case while the compile-time loop is made up of n insns for n #
12668 if (size <= 6 * get_probe_interval ())
12672 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12673 it exceeds SIZE. If only one probe is needed, this will not
12674 generate any code. Then probe at FIRST + SIZE. */
12675 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12676 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12679 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12683 /* Otherwise, do the same as above, but in a loop. Note that we must be
12684 extra careful with variables wrapping around because we might be at
12685 the very top (or the very bottom) of the address space and we have
12686 to be able to handle this case properly; in particular, we use an
12687 equality test for the loop condition. */
12690 /* We expect the GP registers to be saved when probes are used
12691 as the probing sequences might need a scratch register and
12692 the routine to allocate one assumes the integer registers
12693 have already been saved. */
12694 gcc_assert (int_registers_saved);
12696 HOST_WIDE_INT rounded_size, last;
12697 struct scratch_reg sr;
12699 get_scratch_register_on_entry (&sr);
12702 /* Step 1: round SIZE to the previous multiple of the interval. */
12704 rounded_size = ROUND_DOWN (size, get_probe_interval ());
12707 /* Step 2: compute initial and final value of the loop counter. */
12709 /* TEST_OFFSET = FIRST. */
12710 emit_move_insn (sr.reg, GEN_INT (-first));
12712 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12713 last = first + rounded_size;
12716 /* Step 3: the loop
12720 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12723 while (TEST_ADDR != LAST_ADDR)
12725 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12726 until it is equal to ROUNDED_SIZE. */
12728 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12731 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12732 that SIZE is equal to ROUNDED_SIZE. */
12734 if (size != rounded_size)
12735 emit_stack_probe (plus_constant (Pmode,
12736 gen_rtx_PLUS (Pmode,
12739 rounded_size - size));
12741 release_scratch_register_on_entry (&sr, size, true);
12744 /* Make sure nothing is scheduled before we are done. */
12745 emit_insn (gen_blockage ());
12748 /* Probe a range of stack addresses from REG to END, inclusive. These are
12749 offsets from the current stack pointer. */
12752 output_probe_stack_range (rtx reg, rtx end)
12754 static int labelno = 0;
12758 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12761 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12763 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12765 xops[1] = GEN_INT (get_probe_interval ());
12766 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12768 /* Probe at TEST_ADDR. */
12769 xops[0] = stack_pointer_rtx;
12771 xops[2] = const0_rtx;
12772 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12774 /* Test if TEST_ADDR == LAST_ADDR. */
12777 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12780 fputs ("\tjne\t", asm_out_file);
12781 assemble_name_raw (asm_out_file, loop_lab);
12782 fputc ('\n', asm_out_file);
12787 /* Return true if stack frame is required. Update STACK_ALIGNMENT
12788 to the largest alignment, in bits, of stack slot used if stack
12789 frame is required and CHECK_STACK_SLOT is true. */
12792 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
12793 bool check_stack_slot)
12795 HARD_REG_SET set_up_by_prologue, prologue_used;
12798 CLEAR_HARD_REG_SET (prologue_used);
12799 CLEAR_HARD_REG_SET (set_up_by_prologue);
12800 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12801 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12802 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12803 HARD_FRAME_POINTER_REGNUM);
12805 /* The preferred stack alignment is the minimum stack alignment. */
12806 if (stack_alignment > crtl->preferred_stack_boundary)
12807 stack_alignment = crtl->preferred_stack_boundary;
12809 bool require_stack_frame = false;
12811 FOR_EACH_BB_FN (bb, cfun)
12814 FOR_BB_INSNS (bb, insn)
12815 if (NONDEBUG_INSN_P (insn)
12816 && requires_stack_frame_p (insn, prologue_used,
12817 set_up_by_prologue))
12819 require_stack_frame = true;
12821 if (check_stack_slot)
12823 /* Find the maximum stack alignment. */
12824 subrtx_iterator::array_type array;
12825 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12827 && (reg_mentioned_p (stack_pointer_rtx,
12829 || reg_mentioned_p (frame_pointer_rtx,
12832 unsigned int alignment = MEM_ALIGN (*iter);
12833 if (alignment > stack_alignment)
12834 stack_alignment = alignment;
12840 return require_stack_frame;
12843 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
12844 will guide prologue/epilogue to be generated in correct form. */
12847 ix86_finalize_stack_frame_flags (void)
12849 /* Check if stack realign is really needed after reload, and
12850 stores result in cfun */
12851 unsigned int incoming_stack_boundary
12852 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12853 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12854 unsigned int stack_alignment
12855 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12856 ? crtl->max_used_stack_slot_alignment
12857 : crtl->stack_alignment_needed);
12858 unsigned int stack_realign
12859 = (incoming_stack_boundary < stack_alignment);
12860 bool recompute_frame_layout_p = false;
12862 if (crtl->stack_realign_finalized)
12864 /* After stack_realign_needed is finalized, we can't no longer
12866 gcc_assert (crtl->stack_realign_needed == stack_realign);
12870 /* If the only reason for frame_pointer_needed is that we conservatively
12871 assumed stack realignment might be needed or -fno-omit-frame-pointer
12872 is used, but in the end nothing that needed the stack alignment had
12873 been spilled nor stack access, clear frame_pointer_needed and say we
12874 don't need stack realignment. */
12875 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
12876 && frame_pointer_needed
12878 && crtl->sp_is_unchanging
12879 && !ix86_current_function_calls_tls_descriptor
12880 && !crtl->accesses_prior_frames
12881 && !cfun->calls_alloca
12882 && !crtl->calls_eh_return
12883 /* See ira_setup_eliminable_regset for the rationale. */
12884 && !(STACK_CHECK_MOVING_SP
12885 && flag_stack_check
12887 && cfun->can_throw_non_call_exceptions)
12888 && !ix86_frame_pointer_required ()
12889 && get_frame_size () == 0
12890 && ix86_nsaved_sseregs () == 0
12891 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12893 if (ix86_find_max_used_stack_alignment (stack_alignment,
12896 /* Stack frame is required. If stack alignment needed is less
12897 than incoming stack boundary, don't realign stack. */
12898 stack_realign = incoming_stack_boundary < stack_alignment;
12899 if (!stack_realign)
12901 crtl->max_used_stack_slot_alignment
12902 = incoming_stack_boundary;
12903 crtl->stack_alignment_needed
12904 = incoming_stack_boundary;
12905 /* Also update preferred_stack_boundary for leaf
12907 crtl->preferred_stack_boundary
12908 = incoming_stack_boundary;
12913 /* If drap has been set, but it actually isn't live at the
12914 start of the function, there is no reason to set it up. */
12915 if (crtl->drap_reg)
12917 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12918 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
12919 REGNO (crtl->drap_reg)))
12921 crtl->drap_reg = NULL_RTX;
12922 crtl->need_drap = false;
12926 cfun->machine->no_drap_save_restore = true;
12928 frame_pointer_needed = false;
12929 stack_realign = false;
12930 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12931 crtl->stack_alignment_needed = incoming_stack_boundary;
12932 crtl->stack_alignment_estimated = incoming_stack_boundary;
12933 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12934 crtl->preferred_stack_boundary = incoming_stack_boundary;
12935 df_finish_pass (true);
12936 df_scan_alloc (NULL);
12938 df_compute_regs_ever_live (true);
12941 if (flag_var_tracking)
12943 /* Since frame pointer is no longer available, replace it with
12944 stack pointer - UNITS_PER_WORD in debug insns. */
12946 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
12949 next = DF_REF_NEXT_REG (ref);
12950 if (!DF_REF_INSN_INFO (ref))
12953 /* Make sure the next ref is for a different instruction,
12954 so that we're not affected by the rescan. */
12955 rtx_insn *insn = DF_REF_INSN (ref);
12956 while (next && DF_REF_INSN (next) == insn)
12957 next = DF_REF_NEXT_REG (next);
12959 if (DEBUG_INSN_P (insn))
12961 bool changed = false;
12962 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
12964 rtx *loc = DF_REF_LOC (ref);
12965 if (*loc == hard_frame_pointer_rtx)
12967 *loc = plus_constant (Pmode,
12974 df_insn_rescan (insn);
12979 recompute_frame_layout_p = true;
12982 else if (crtl->max_used_stack_slot_alignment >= 128)
12984 /* We don't need to realign stack. max_used_stack_alignment is
12985 used to decide how stack frame should be aligned. This is
12986 independent of any psABIs nor 32-bit vs 64-bit. It is always
12987 safe to compute max_used_stack_alignment. We compute it only
12988 if 128-bit aligned load/store may be generated on misaligned
12989 stack slot which will lead to segfault. */
12990 if (ix86_find_max_used_stack_alignment (stack_alignment, true))
12991 cfun->machine->max_used_stack_alignment
12992 = stack_alignment / BITS_PER_UNIT;
12995 if (crtl->stack_realign_needed != stack_realign)
12996 recompute_frame_layout_p = true;
12997 crtl->stack_realign_needed = stack_realign;
12998 crtl->stack_realign_finalized = true;
12999 if (recompute_frame_layout_p)
13000 ix86_compute_frame_layout ();
13003 /* Delete SET_GOT right after entry block if it is allocated to reg. */
13006 ix86_elim_entry_set_got (rtx reg)
13008 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13009 rtx_insn *c_insn = BB_HEAD (bb);
13010 if (!NONDEBUG_INSN_P (c_insn))
13011 c_insn = next_nonnote_nondebug_insn (c_insn);
13012 if (c_insn && NONJUMP_INSN_P (c_insn))
13014 rtx pat = PATTERN (c_insn);
13015 if (GET_CODE (pat) == PARALLEL)
13017 rtx vec = XVECEXP (pat, 0, 0);
13018 if (GET_CODE (vec) == SET
13019 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
13020 && REGNO (XEXP (vec, 0)) == REGNO (reg))
13021 delete_insn (c_insn);
13027 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
13032 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
13033 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
13034 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
13038 gen_frame_load (rtx reg, rtx frame_reg, int offset)
13040 return gen_frame_set (reg, frame_reg, offset, false);
13044 gen_frame_store (rtx reg, rtx frame_reg, int offset)
13046 return gen_frame_set (reg, frame_reg, offset, true);
13050 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
13052 struct machine_function *m = cfun->machine;
13053 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13054 + m->call_ms2sysv_extra_regs;
13055 rtvec v = rtvec_alloc (ncregs + 1);
13056 unsigned int align, i, vi = 0;
13059 rtx rax = gen_rtx_REG (word_mode, AX_REG);
13060 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13062 /* AL should only be live with sysv_abi. */
13063 gcc_assert (!ix86_eax_live_at_start_p ());
13064 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
13066 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
13067 we've actually realigned the stack or not. */
13068 align = GET_MODE_ALIGNMENT (V4SFmode);
13069 addr = choose_baseaddr (frame.stack_realign_offset
13070 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
13071 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13073 emit_insn (gen_rtx_SET (rax, addr));
13075 /* Get the stub symbol. */
13076 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
13077 : XLOGUE_STUB_SAVE);
13078 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13080 for (i = 0; i < ncregs; ++i)
13082 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13083 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
13085 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
13088 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
13090 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
13091 RTX_FRAME_RELATED_P (insn) = true;
13094 /* Expand the prologue into a bunch of separate insns. */
13097 ix86_expand_prologue (void)
13099 struct machine_function *m = cfun->machine;
13101 HOST_WIDE_INT allocate;
13102 bool int_registers_saved;
13103 bool sse_registers_saved;
13104 bool save_stub_call_needed;
13105 rtx static_chain = NULL_RTX;
13107 if (ix86_function_naked (current_function_decl))
13110 ix86_finalize_stack_frame_flags ();
13112 /* DRAP should not coexist with stack_realign_fp */
13113 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
13115 memset (&m->fs, 0, sizeof (m->fs));
13117 /* Initialize CFA state for before the prologue. */
13118 m->fs.cfa_reg = stack_pointer_rtx;
13119 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
13121 /* Track SP offset to the CFA. We continue tracking this after we've
13122 swapped the CFA register away from SP. In the case of re-alignment
13123 this is fudged; we're interested to offsets within the local frame. */
13124 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13125 m->fs.sp_valid = true;
13126 m->fs.sp_realigned = false;
13128 const struct ix86_frame &frame = cfun->machine->frame;
13130 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
13132 /* We should have already generated an error for any use of
13133 ms_hook on a nested function. */
13134 gcc_checking_assert (!ix86_static_chain_on_stack);
13136 /* Check if profiling is active and we shall use profiling before
13137 prologue variant. If so sorry. */
13138 if (crtl->profile && flag_fentry != 0)
13139 sorry ("ms_hook_prologue attribute isn%'t compatible "
13140 "with -mfentry for 32-bit");
13142 /* In ix86_asm_output_function_label we emitted:
13143 8b ff movl.s %edi,%edi
13145 8b ec movl.s %esp,%ebp
13147 This matches the hookable function prologue in Win32 API
13148 functions in Microsoft Windows XP Service Pack 2 and newer.
13149 Wine uses this to enable Windows apps to hook the Win32 API
13150 functions provided by Wine.
13152 What that means is that we've already set up the frame pointer. */
13154 if (frame_pointer_needed
13155 && !(crtl->drap_reg && crtl->stack_realign_needed))
13159 /* We've decided to use the frame pointer already set up.
13160 Describe this to the unwinder by pretending that both
13161 push and mov insns happen right here.
13163 Putting the unwind info here at the end of the ms_hook
13164 is done so that we can make absolutely certain we get
13165 the required byte sequence at the start of the function,
13166 rather than relying on an assembler that can produce
13167 the exact encoding required.
13169 However it does mean (in the unpatched case) that we have
13170 a 1 insn window where the asynchronous unwind info is
13171 incorrect. However, if we placed the unwind info at
13172 its correct location we would have incorrect unwind info
13173 in the patched case. Which is probably all moot since
13174 I don't expect Wine generates dwarf2 unwind info for the
13175 system libraries that use this feature. */
13177 insn = emit_insn (gen_blockage ());
13179 push = gen_push (hard_frame_pointer_rtx);
13180 mov = gen_rtx_SET (hard_frame_pointer_rtx,
13181 stack_pointer_rtx);
13182 RTX_FRAME_RELATED_P (push) = 1;
13183 RTX_FRAME_RELATED_P (mov) = 1;
13185 RTX_FRAME_RELATED_P (insn) = 1;
13186 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13187 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
13189 /* Note that gen_push incremented m->fs.cfa_offset, even
13190 though we didn't emit the push insn here. */
13191 m->fs.cfa_reg = hard_frame_pointer_rtx;
13192 m->fs.fp_offset = m->fs.cfa_offset;
13193 m->fs.fp_valid = true;
13197 /* The frame pointer is not needed so pop %ebp again.
13198 This leaves us with a pristine state. */
13199 emit_insn (gen_pop (hard_frame_pointer_rtx));
13203 /* The first insn of a function that accepts its static chain on the
13204 stack is to push the register that would be filled in by a direct
13205 call. This insn will be skipped by the trampoline. */
13206 else if (ix86_static_chain_on_stack)
13208 static_chain = ix86_static_chain (cfun->decl, false);
13209 insn = emit_insn (gen_push (static_chain));
13210 emit_insn (gen_blockage ());
13212 /* We don't want to interpret this push insn as a register save,
13213 only as a stack adjustment. The real copy of the register as
13214 a save will be done later, if needed. */
13215 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
13216 t = gen_rtx_SET (stack_pointer_rtx, t);
13217 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
13218 RTX_FRAME_RELATED_P (insn) = 1;
13221 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13222 of DRAP is needed and stack realignment is really needed after reload */
13223 if (stack_realign_drap)
13225 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13227 /* Can't use DRAP in interrupt function. */
13228 if (cfun->machine->func_type != TYPE_NORMAL)
13229 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13230 "in interrupt service routine. This may be worked "
13231 "around by avoiding functions with aggregate return.");
13233 /* Only need to push parameter pointer reg if it is caller saved. */
13234 if (!call_used_regs[REGNO (crtl->drap_reg)])
13236 /* Push arg pointer reg */
13237 insn = emit_insn (gen_push (crtl->drap_reg));
13238 RTX_FRAME_RELATED_P (insn) = 1;
13241 /* Grab the argument pointer. */
13242 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13243 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13244 RTX_FRAME_RELATED_P (insn) = 1;
13245 m->fs.cfa_reg = crtl->drap_reg;
13246 m->fs.cfa_offset = 0;
13248 /* Align the stack. */
13249 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13251 GEN_INT (-align_bytes)));
13252 RTX_FRAME_RELATED_P (insn) = 1;
13254 /* Replicate the return address on the stack so that return
13255 address can be reached via (argp - 1) slot. This is needed
13256 to implement macro RETURN_ADDR_RTX and intrinsic function
13257 expand_builtin_return_addr etc. */
13258 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13259 t = gen_frame_mem (word_mode, t);
13260 insn = emit_insn (gen_push (t));
13261 RTX_FRAME_RELATED_P (insn) = 1;
13263 /* For the purposes of frame and register save area addressing,
13264 we've started over with a new frame. */
13265 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13266 m->fs.realigned = true;
13270 /* Replicate static chain on the stack so that static chain
13271 can be reached via (argp - 2) slot. This is needed for
13272 nested function with stack realignment. */
13273 insn = emit_insn (gen_push (static_chain));
13274 RTX_FRAME_RELATED_P (insn) = 1;
13278 int_registers_saved = (frame.nregs == 0);
13279 sse_registers_saved = (frame.nsseregs == 0);
13280 save_stub_call_needed = (m->call_ms2sysv);
13281 gcc_assert (sse_registers_saved || !save_stub_call_needed);
13283 if (frame_pointer_needed && !m->fs.fp_valid)
13285 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13286 slower on all targets. Also sdb didn't like it. */
13287 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13288 RTX_FRAME_RELATED_P (insn) = 1;
13290 /* Push registers now, before setting the frame pointer
13292 if (!int_registers_saved
13294 && !frame.save_regs_using_mov)
13296 ix86_emit_save_regs ();
13297 int_registers_saved = true;
13298 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13301 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13303 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13304 RTX_FRAME_RELATED_P (insn) = 1;
13306 if (m->fs.cfa_reg == stack_pointer_rtx)
13307 m->fs.cfa_reg = hard_frame_pointer_rtx;
13308 m->fs.fp_offset = m->fs.sp_offset;
13309 m->fs.fp_valid = true;
13313 if (!int_registers_saved)
13315 /* If saving registers via PUSH, do so now. */
13316 if (!frame.save_regs_using_mov)
13318 ix86_emit_save_regs ();
13319 int_registers_saved = true;
13320 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13323 /* When using red zone we may start register saving before allocating
13324 the stack frame saving one cycle of the prologue. However, avoid
13325 doing this if we have to probe the stack; at least on x86_64 the
13326 stack probe can turn into a call that clobbers a red zone location. */
13327 else if (ix86_using_red_zone ()
13328 && (! TARGET_STACK_PROBE
13329 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13331 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13332 int_registers_saved = true;
13336 if (stack_realign_fp)
13338 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13339 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13341 /* Record last valid frame pointer offset. */
13342 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13344 /* The computation of the size of the re-aligned stack frame means
13345 that we must allocate the size of the register save area before
13346 performing the actual alignment. Otherwise we cannot guarantee
13347 that there's enough storage above the realignment point. */
13348 allocate = frame.reg_save_offset - m->fs.sp_offset
13349 + frame.stack_realign_allocate;
13351 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13352 GEN_INT (-allocate), -1, false);
13354 /* Align the stack. */
13355 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13357 GEN_INT (-align_bytes)));
13358 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13359 m->fs.sp_realigned_offset = m->fs.sp_offset
13360 - frame.stack_realign_allocate;
13361 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13362 Beyond this point, stack access should be done via choose_baseaddr or
13363 by using sp_valid_at and fp_valid_at to determine the correct base
13364 register. Henceforth, any CFA offset should be thought of as logical
13365 and not physical. */
13366 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13367 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13368 m->fs.sp_realigned = true;
13370 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13371 is needed to describe where a register is saved using a realigned
13372 stack pointer, so we need to invalidate the stack pointer for that
13375 m->fs.sp_valid = false;
13377 /* If SP offset is non-immediate after allocation of the stack frame,
13378 then emit SSE saves or stub call prior to allocating the rest of the
13379 stack frame. This is less efficient for the out-of-line stub because
13380 we can't combine allocations across the call barrier, but it's better
13381 than using a scratch register. */
13382 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
13383 - m->fs.sp_realigned_offset),
13386 if (!sse_registers_saved)
13388 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13389 sse_registers_saved = true;
13391 else if (save_stub_call_needed)
13393 ix86_emit_outlined_ms2sysv_save (frame);
13394 save_stub_call_needed = false;
13399 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13401 if (flag_stack_usage_info)
13403 /* We start to count from ARG_POINTER. */
13404 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13406 /* If it was realigned, take into account the fake frame. */
13407 if (stack_realign_drap)
13409 if (ix86_static_chain_on_stack)
13410 stack_size += UNITS_PER_WORD;
13412 if (!call_used_regs[REGNO (crtl->drap_reg)])
13413 stack_size += UNITS_PER_WORD;
13415 /* This over-estimates by 1 minimal-stack-alignment-unit but
13416 mitigates that by counting in the new return address slot. */
13417 current_function_dynamic_stack_size
13418 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13421 current_function_static_stack_size = stack_size;
13424 /* On SEH target with very large frame size, allocate an area to save
13425 SSE registers (as the very large allocation won't be described). */
13427 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13428 && !sse_registers_saved)
13430 HOST_WIDE_INT sse_size
13431 = frame.sse_reg_save_offset - frame.reg_save_offset;
13433 gcc_assert (int_registers_saved);
13435 /* No need to do stack checking as the area will be immediately
13437 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13438 GEN_INT (-sse_size), -1,
13439 m->fs.cfa_reg == stack_pointer_rtx);
13440 allocate -= sse_size;
13441 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13442 sse_registers_saved = true;
13445 /* The stack has already been decremented by the instruction calling us
13446 so probe if the size is non-negative to preserve the protection area. */
13448 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13449 || flag_stack_clash_protection))
13451 if (flag_stack_clash_protection)
13453 ix86_adjust_stack_and_probe_stack_clash (allocate,
13454 int_registers_saved);
13457 else if (STACK_CHECK_MOVING_SP)
13459 if (!(crtl->is_leaf && !cfun->calls_alloca
13460 && allocate <= get_probe_interval ()))
13462 ix86_adjust_stack_and_probe (allocate, int_registers_saved);
13468 HOST_WIDE_INT size = allocate;
13470 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13471 size = 0x80000000 - get_stack_check_protect () - 1;
13473 if (TARGET_STACK_PROBE)
13475 if (crtl->is_leaf && !cfun->calls_alloca)
13477 if (size > get_probe_interval ())
13478 ix86_emit_probe_stack_range (0, size, int_registers_saved);
13481 ix86_emit_probe_stack_range (0,
13482 size + get_stack_check_protect (),
13483 int_registers_saved);
13487 if (crtl->is_leaf && !cfun->calls_alloca)
13489 if (size > get_probe_interval ()
13490 && size > get_stack_check_protect ())
13491 ix86_emit_probe_stack_range (get_stack_check_protect (),
13493 - get_stack_check_protect ()),
13494 int_registers_saved);
13497 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
13498 int_registers_saved);
13505 else if (!ix86_target_stack_probe ()
13506 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13508 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13509 GEN_INT (-allocate), -1,
13510 m->fs.cfa_reg == stack_pointer_rtx);
13514 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13516 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13517 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13518 bool eax_live = ix86_eax_live_at_start_p ();
13519 bool r10_live = false;
13522 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13526 insn = emit_insn (gen_push (eax));
13527 allocate -= UNITS_PER_WORD;
13528 /* Note that SEH directives need to continue tracking the stack
13529 pointer even after the frame pointer has been set up. */
13530 if (sp_is_cfa_reg || TARGET_SEH)
13533 m->fs.cfa_offset += UNITS_PER_WORD;
13534 RTX_FRAME_RELATED_P (insn) = 1;
13535 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13536 gen_rtx_SET (stack_pointer_rtx,
13537 plus_constant (Pmode, stack_pointer_rtx,
13538 -UNITS_PER_WORD)));
13544 r10 = gen_rtx_REG (Pmode, R10_REG);
13545 insn = emit_insn (gen_push (r10));
13546 allocate -= UNITS_PER_WORD;
13547 if (sp_is_cfa_reg || TARGET_SEH)
13550 m->fs.cfa_offset += UNITS_PER_WORD;
13551 RTX_FRAME_RELATED_P (insn) = 1;
13552 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13553 gen_rtx_SET (stack_pointer_rtx,
13554 plus_constant (Pmode, stack_pointer_rtx,
13555 -UNITS_PER_WORD)));
13559 emit_move_insn (eax, GEN_INT (allocate));
13560 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13562 /* Use the fact that AX still contains ALLOCATE. */
13563 adjust_stack_insn = (Pmode == DImode
13564 ? gen_pro_epilogue_adjust_stack_di_sub
13565 : gen_pro_epilogue_adjust_stack_si_sub);
13567 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13568 stack_pointer_rtx, eax));
13570 if (sp_is_cfa_reg || TARGET_SEH)
13573 m->fs.cfa_offset += allocate;
13574 RTX_FRAME_RELATED_P (insn) = 1;
13575 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13576 gen_rtx_SET (stack_pointer_rtx,
13577 plus_constant (Pmode, stack_pointer_rtx,
13580 m->fs.sp_offset += allocate;
13582 /* Use stack_pointer_rtx for relative addressing so that code
13583 works for realigned stack, too. */
13584 if (r10_live && eax_live)
13586 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13587 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13588 gen_frame_mem (word_mode, t));
13589 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13590 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13591 gen_frame_mem (word_mode, t));
13593 else if (eax_live || r10_live)
13595 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13596 emit_move_insn (gen_rtx_REG (word_mode,
13597 (eax_live ? AX_REG : R10_REG)),
13598 gen_frame_mem (word_mode, t));
13601 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13603 /* If we havn't already set up the frame pointer, do so now. */
13604 if (frame_pointer_needed && !m->fs.fp_valid)
13606 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13607 GEN_INT (frame.stack_pointer_offset
13608 - frame.hard_frame_pointer_offset));
13609 insn = emit_insn (insn);
13610 RTX_FRAME_RELATED_P (insn) = 1;
13611 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13613 if (m->fs.cfa_reg == stack_pointer_rtx)
13614 m->fs.cfa_reg = hard_frame_pointer_rtx;
13615 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13616 m->fs.fp_valid = true;
13619 if (!int_registers_saved)
13620 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13621 if (!sse_registers_saved)
13622 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13623 else if (save_stub_call_needed)
13624 ix86_emit_outlined_ms2sysv_save (frame);
13626 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13628 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13630 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13631 insn = emit_insn (gen_set_got (pic));
13632 RTX_FRAME_RELATED_P (insn) = 1;
13633 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13634 emit_insn (gen_prologue_use (pic));
13635 /* Deleting already emmitted SET_GOT if exist and allocated to
13636 REAL_PIC_OFFSET_TABLE_REGNUM. */
13637 ix86_elim_entry_set_got (pic);
13640 if (crtl->drap_reg && !crtl->stack_realign_needed)
13642 /* vDRAP is setup but after reload it turns out stack realign
13643 isn't necessary, here we will emit prologue to setup DRAP
13644 without stack realign adjustment */
13645 t = choose_baseaddr (0, NULL);
13646 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13649 /* Prevent instructions from being scheduled into register save push
13650 sequence when access to the redzone area is done through frame pointer.
13651 The offset between the frame pointer and the stack pointer is calculated
13652 relative to the value of the stack pointer at the end of the function
13653 prologue, and moving instructions that access redzone area via frame
13654 pointer inside push sequence violates this assumption. */
13655 if (frame_pointer_needed && frame.red_zone_size)
13656 emit_insn (gen_memory_blockage ());
13658 /* SEH requires that the prologue end within 256 bytes of the start of
13659 the function. Prevent instruction schedules that would extend that.
13660 Further, prevent alloca modifications to the stack pointer from being
13661 combined with prologue modifications. */
13663 emit_insn (gen_prologue_use (stack_pointer_rtx));
13666 /* Emit code to restore REG using a POP insn. */
13669 ix86_emit_restore_reg_using_pop (rtx reg)
13671 struct machine_function *m = cfun->machine;
13672 rtx_insn *insn = emit_insn (gen_pop (reg));
13674 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13675 m->fs.sp_offset -= UNITS_PER_WORD;
13677 if (m->fs.cfa_reg == crtl->drap_reg
13678 && REGNO (reg) == REGNO (crtl->drap_reg))
13680 /* Previously we'd represented the CFA as an expression
13681 like *(%ebp - 8). We've just popped that value from
13682 the stack, which means we need to reset the CFA to
13683 the drap register. This will remain until we restore
13684 the stack pointer. */
13685 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13686 RTX_FRAME_RELATED_P (insn) = 1;
13688 /* This means that the DRAP register is valid for addressing too. */
13689 m->fs.drap_valid = true;
13693 if (m->fs.cfa_reg == stack_pointer_rtx)
13695 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13696 x = gen_rtx_SET (stack_pointer_rtx, x);
13697 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13698 RTX_FRAME_RELATED_P (insn) = 1;
13700 m->fs.cfa_offset -= UNITS_PER_WORD;
13703 /* When the frame pointer is the CFA, and we pop it, we are
13704 swapping back to the stack pointer as the CFA. This happens
13705 for stack frames that don't allocate other data, so we assume
13706 the stack pointer is now pointing at the return address, i.e.
13707 the function entry state, which makes the offset be 1 word. */
13708 if (reg == hard_frame_pointer_rtx)
13710 m->fs.fp_valid = false;
13711 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13713 m->fs.cfa_reg = stack_pointer_rtx;
13714 m->fs.cfa_offset -= UNITS_PER_WORD;
13716 add_reg_note (insn, REG_CFA_DEF_CFA,
13717 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13718 GEN_INT (m->fs.cfa_offset)));
13719 RTX_FRAME_RELATED_P (insn) = 1;
13724 /* Emit code to restore saved registers using POP insns. */
13727 ix86_emit_restore_regs_using_pop (void)
13729 unsigned int regno;
13731 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13732 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
13733 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13736 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
13737 omits the emit and only attaches the notes. */
13740 ix86_emit_leave (rtx_insn *insn)
13742 struct machine_function *m = cfun->machine;
13744 insn = emit_insn (ix86_gen_leave ());
13746 ix86_add_queued_cfa_restore_notes (insn);
13748 gcc_assert (m->fs.fp_valid);
13749 m->fs.sp_valid = true;
13750 m->fs.sp_realigned = false;
13751 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13752 m->fs.fp_valid = false;
13754 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13756 m->fs.cfa_reg = stack_pointer_rtx;
13757 m->fs.cfa_offset = m->fs.sp_offset;
13759 add_reg_note (insn, REG_CFA_DEF_CFA,
13760 plus_constant (Pmode, stack_pointer_rtx,
13762 RTX_FRAME_RELATED_P (insn) = 1;
13764 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13768 /* Emit code to restore saved registers using MOV insns.
13769 First register is restored from CFA - CFA_OFFSET. */
13771 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13772 bool maybe_eh_return)
13774 struct machine_function *m = cfun->machine;
13775 unsigned int regno;
13777 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13778 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13780 rtx reg = gen_rtx_REG (word_mode, regno);
13784 mem = choose_baseaddr (cfa_offset, NULL);
13785 mem = gen_frame_mem (word_mode, mem);
13786 insn = emit_move_insn (reg, mem);
13788 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13790 /* Previously we'd represented the CFA as an expression
13791 like *(%ebp - 8). We've just popped that value from
13792 the stack, which means we need to reset the CFA to
13793 the drap register. This will remain until we restore
13794 the stack pointer. */
13795 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13796 RTX_FRAME_RELATED_P (insn) = 1;
13798 /* This means that the DRAP register is valid for addressing. */
13799 m->fs.drap_valid = true;
13802 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13804 cfa_offset -= UNITS_PER_WORD;
13808 /* Emit code to restore saved registers using MOV insns.
13809 First register is restored from CFA - CFA_OFFSET. */
13811 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13812 bool maybe_eh_return)
13814 unsigned int regno;
13816 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13817 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13819 rtx reg = gen_rtx_REG (V4SFmode, regno);
13821 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
13823 mem = choose_baseaddr (cfa_offset, &align);
13824 mem = gen_rtx_MEM (V4SFmode, mem);
13826 /* The location aligment depends upon the base register. */
13827 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
13828 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
13829 set_mem_align (mem, align);
13830 emit_insn (gen_rtx_SET (reg, mem));
13832 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13834 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13839 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
13840 bool use_call, int style)
13842 struct machine_function *m = cfun->machine;
13843 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13844 + m->call_ms2sysv_extra_regs;
13846 unsigned int elems_needed, align, i, vi = 0;
13849 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
13850 rtx r10 = NULL_RTX;
13851 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13852 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
13853 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
13854 rtx rsi_frame_load = NULL_RTX;
13855 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
13856 enum xlogue_stub stub;
13858 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
13860 /* If using a realigned stack, we should never start with padding. */
13861 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
13863 /* Setup RSI as the stub's base pointer. */
13864 align = GET_MODE_ALIGNMENT (V4SFmode);
13865 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
13866 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13868 emit_insn (gen_rtx_SET (rsi, tmp));
13870 /* Get a symbol for the stub. */
13871 if (frame_pointer_needed)
13872 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
13873 : XLOGUE_STUB_RESTORE_HFP_TAIL;
13875 stub = use_call ? XLOGUE_STUB_RESTORE
13876 : XLOGUE_STUB_RESTORE_TAIL;
13877 sym = xlogue.get_stub_rtx (stub);
13879 elems_needed = ncregs;
13883 elems_needed += frame_pointer_needed ? 5 : 3;
13884 v = rtvec_alloc (elems_needed);
13886 /* We call the epilogue stub when we need to pop incoming args or we are
13887 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
13888 epilogue stub and it is the tail-call. */
13890 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13893 RTVEC_ELT (v, vi++) = ret_rtx;
13894 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13895 if (frame_pointer_needed)
13897 rtx rbp = gen_rtx_REG (DImode, BP_REG);
13898 gcc_assert (m->fs.fp_valid);
13899 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
13901 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
13902 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
13903 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
13904 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
13905 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
13909 /* If no hard frame pointer, we set R10 to the SP restore value. */
13910 gcc_assert (!m->fs.fp_valid);
13911 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13912 gcc_assert (m->fs.sp_valid);
13914 r10 = gen_rtx_REG (DImode, R10_REG);
13915 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
13916 emit_insn (gen_rtx_SET (r10, tmp));
13918 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
13922 /* Generate frame load insns and restore notes. */
13923 for (i = 0; i < ncregs; ++i)
13925 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13926 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
13927 rtx reg, frame_load;
13929 reg = gen_rtx_REG (mode, r.regno);
13930 frame_load = gen_frame_load (reg, rsi, r.offset);
13932 /* Save RSI frame load insn & note to add last. */
13933 if (r.regno == SI_REG)
13935 gcc_assert (!rsi_frame_load);
13936 rsi_frame_load = frame_load;
13937 rsi_restore_offset = r.offset;
13941 RTVEC_ELT (v, vi++) = frame_load;
13942 ix86_add_cfa_restore_note (NULL, reg, r.offset);
13946 /* Add RSI frame load & restore note at the end. */
13947 gcc_assert (rsi_frame_load);
13948 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
13949 RTVEC_ELT (v, vi++) = rsi_frame_load;
13950 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
13951 rsi_restore_offset);
13953 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
13954 if (!use_call && !frame_pointer_needed)
13956 gcc_assert (m->fs.sp_valid);
13957 gcc_assert (!m->fs.sp_realigned);
13959 /* At this point, R10 should point to frame.stack_realign_offset. */
13960 if (m->fs.cfa_reg == stack_pointer_rtx)
13961 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
13962 m->fs.sp_offset = frame.stack_realign_offset;
13965 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
13966 tmp = gen_rtx_PARALLEL (VOIDmode, v);
13968 insn = emit_insn (tmp);
13971 insn = emit_jump_insn (tmp);
13972 JUMP_LABEL (insn) = ret_rtx;
13974 if (frame_pointer_needed)
13975 ix86_emit_leave (insn);
13978 /* Need CFA adjust note. */
13979 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
13980 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
13984 RTX_FRAME_RELATED_P (insn) = true;
13985 ix86_add_queued_cfa_restore_notes (insn);
13987 /* If we're not doing a tail-call, we need to adjust the stack. */
13988 if (use_call && m->fs.sp_valid)
13990 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
13991 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13992 GEN_INT (dealloc), style,
13993 m->fs.cfa_reg == stack_pointer_rtx);
13997 /* Restore function stack, frame, and registers. */
14000 ix86_expand_epilogue (int style)
14002 struct machine_function *m = cfun->machine;
14003 struct machine_frame_state frame_state_save = m->fs;
14004 bool restore_regs_via_mov;
14006 bool restore_stub_is_tail = false;
14008 if (ix86_function_naked (current_function_decl))
14010 /* The program should not reach this point. */
14011 emit_insn (gen_ud2 ());
14015 ix86_finalize_stack_frame_flags ();
14016 const struct ix86_frame &frame = cfun->machine->frame;
14018 m->fs.sp_realigned = stack_realign_fp;
14019 m->fs.sp_valid = stack_realign_fp
14020 || !frame_pointer_needed
14021 || crtl->sp_is_unchanging;
14022 gcc_assert (!m->fs.sp_valid
14023 || m->fs.sp_offset == frame.stack_pointer_offset);
14025 /* The FP must be valid if the frame pointer is present. */
14026 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
14027 gcc_assert (!m->fs.fp_valid
14028 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
14030 /* We must have *some* valid pointer to the stack frame. */
14031 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
14033 /* The DRAP is never valid at this point. */
14034 gcc_assert (!m->fs.drap_valid);
14036 /* See the comment about red zone and frame
14037 pointer usage in ix86_expand_prologue. */
14038 if (frame_pointer_needed && frame.red_zone_size)
14039 emit_insn (gen_memory_blockage ());
14041 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
14042 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
14044 /* Determine the CFA offset of the end of the red-zone. */
14045 m->fs.red_zone_offset = 0;
14046 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
14048 /* The red-zone begins below return address and error code in
14049 exception handler. */
14050 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
14052 /* When the register save area is in the aligned portion of
14053 the stack, determine the maximum runtime displacement that
14054 matches up with the aligned frame. */
14055 if (stack_realign_drap)
14056 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
14060 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
14062 /* Special care must be taken for the normal return case of a function
14063 using eh_return: the eax and edx registers are marked as saved, but
14064 not restored along this path. Adjust the save location to match. */
14065 if (crtl->calls_eh_return && style != 2)
14066 reg_save_offset -= 2 * UNITS_PER_WORD;
14068 /* EH_RETURN requires the use of moves to function properly. */
14069 if (crtl->calls_eh_return)
14070 restore_regs_via_mov = true;
14071 /* SEH requires the use of pops to identify the epilogue. */
14072 else if (TARGET_SEH)
14073 restore_regs_via_mov = false;
14074 /* If we're only restoring one register and sp cannot be used then
14075 using a move instruction to restore the register since it's
14076 less work than reloading sp and popping the register. */
14077 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
14078 restore_regs_via_mov = true;
14079 else if (TARGET_EPILOGUE_USING_MOVE
14080 && cfun->machine->use_fast_prologue_epilogue
14081 && (frame.nregs > 1
14082 || m->fs.sp_offset != reg_save_offset))
14083 restore_regs_via_mov = true;
14084 else if (frame_pointer_needed
14086 && m->fs.sp_offset != reg_save_offset)
14087 restore_regs_via_mov = true;
14088 else if (frame_pointer_needed
14089 && TARGET_USE_LEAVE
14090 && cfun->machine->use_fast_prologue_epilogue
14091 && frame.nregs == 1)
14092 restore_regs_via_mov = true;
14094 restore_regs_via_mov = false;
14096 if (restore_regs_via_mov || frame.nsseregs)
14098 /* Ensure that the entire register save area is addressable via
14099 the stack pointer, if we will restore SSE regs via sp. */
14101 && m->fs.sp_offset > 0x7fffffff
14102 && sp_valid_at (frame.stack_realign_offset + 1)
14103 && (frame.nsseregs + frame.nregs) != 0)
14105 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14106 GEN_INT (m->fs.sp_offset
14107 - frame.sse_reg_save_offset),
14109 m->fs.cfa_reg == stack_pointer_rtx);
14113 /* If there are any SSE registers to restore, then we have to do it
14114 via moves, since there's obviously no pop for SSE regs. */
14115 if (frame.nsseregs)
14116 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
14119 if (m->call_ms2sysv)
14121 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
14123 /* We cannot use a tail-call for the stub if:
14124 1. We have to pop incoming args,
14125 2. We have additional int regs to restore, or
14126 3. A sibling call will be the tail-call, or
14127 4. We are emitting an eh_return_internal epilogue.
14129 TODO: Item 4 has not yet tested!
14131 If any of the above are true, we will call the stub rather than
14133 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
14134 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
14137 /* If using out-of-line stub that is a tail-call, then...*/
14138 if (m->call_ms2sysv && restore_stub_is_tail)
14140 /* TODO: parinoid tests. (remove eventually) */
14141 gcc_assert (m->fs.sp_valid);
14142 gcc_assert (!m->fs.sp_realigned);
14143 gcc_assert (!m->fs.fp_valid);
14144 gcc_assert (!m->fs.realigned);
14145 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
14146 gcc_assert (!crtl->drap_reg);
14147 gcc_assert (!frame.nregs);
14149 else if (restore_regs_via_mov)
14154 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
14156 /* eh_return epilogues need %ecx added to the stack pointer. */
14159 rtx sa = EH_RETURN_STACKADJ_RTX;
14162 /* %ecx can't be used for both DRAP register and eh_return. */
14163 if (crtl->drap_reg)
14164 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
14166 /* regparm nested functions don't work with eh_return. */
14167 gcc_assert (!ix86_static_chain_on_stack);
14169 if (frame_pointer_needed)
14171 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
14172 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
14173 emit_insn (gen_rtx_SET (sa, t));
14175 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
14176 insn = emit_move_insn (hard_frame_pointer_rtx, t);
14178 /* Note that we use SA as a temporary CFA, as the return
14179 address is at the proper place relative to it. We
14180 pretend this happens at the FP restore insn because
14181 prior to this insn the FP would be stored at the wrong
14182 offset relative to SA, and after this insn we have no
14183 other reasonable register to use for the CFA. We don't
14184 bother resetting the CFA to the SP for the duration of
14185 the return insn, unless the control flow instrumentation
14186 is done. In this case the SP is used later and we have
14187 to reset CFA to SP. */
14188 add_reg_note (insn, REG_CFA_DEF_CFA,
14189 plus_constant (Pmode, sa, UNITS_PER_WORD));
14190 ix86_add_queued_cfa_restore_notes (insn);
14191 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
14192 RTX_FRAME_RELATED_P (insn) = 1;
14194 m->fs.cfa_reg = sa;
14195 m->fs.cfa_offset = UNITS_PER_WORD;
14196 m->fs.fp_valid = false;
14198 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
14200 flag_cf_protection);
14204 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
14205 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
14206 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
14207 ix86_add_queued_cfa_restore_notes (insn);
14209 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14210 if (m->fs.cfa_offset != UNITS_PER_WORD)
14212 m->fs.cfa_offset = UNITS_PER_WORD;
14213 add_reg_note (insn, REG_CFA_DEF_CFA,
14214 plus_constant (Pmode, stack_pointer_rtx,
14216 RTX_FRAME_RELATED_P (insn) = 1;
14219 m->fs.sp_offset = UNITS_PER_WORD;
14220 m->fs.sp_valid = true;
14221 m->fs.sp_realigned = false;
14226 /* SEH requires that the function end with (1) a stack adjustment
14227 if necessary, (2) a sequence of pops, and (3) a return or
14228 jump instruction. Prevent insns from the function body from
14229 being scheduled into this sequence. */
14232 /* Prevent a catch region from being adjacent to the standard
14233 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
14234 nor several other flags that would be interesting to test are
14236 if (flag_non_call_exceptions)
14237 emit_insn (gen_nops (const1_rtx));
14239 emit_insn (gen_blockage ());
14242 /* First step is to deallocate the stack frame so that we can
14243 pop the registers. If the stack pointer was realigned, it needs
14244 to be restored now. Also do it on SEH target for very large
14245 frame as the emitted instructions aren't allowed by the ABI
14247 if (!m->fs.sp_valid || m->fs.sp_realigned
14249 && (m->fs.sp_offset - reg_save_offset
14250 >= SEH_MAX_FRAME_SIZE)))
14252 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14253 GEN_INT (m->fs.fp_offset
14254 - reg_save_offset),
14257 else if (m->fs.sp_offset != reg_save_offset)
14259 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14260 GEN_INT (m->fs.sp_offset
14261 - reg_save_offset),
14263 m->fs.cfa_reg == stack_pointer_rtx);
14266 ix86_emit_restore_regs_using_pop ();
14269 /* If we used a stack pointer and haven't already got rid of it,
14271 if (m->fs.fp_valid)
14273 /* If the stack pointer is valid and pointing at the frame
14274 pointer store address, then we only need a pop. */
14275 if (sp_valid_at (frame.hfp_save_offset)
14276 && m->fs.sp_offset == frame.hfp_save_offset)
14277 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14278 /* Leave results in shorter dependency chains on CPUs that are
14279 able to grok it fast. */
14280 else if (TARGET_USE_LEAVE
14281 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14282 || !cfun->machine->use_fast_prologue_epilogue)
14283 ix86_emit_leave (NULL);
14286 pro_epilogue_adjust_stack (stack_pointer_rtx,
14287 hard_frame_pointer_rtx,
14288 const0_rtx, style, !using_drap);
14289 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14295 int param_ptr_offset = UNITS_PER_WORD;
14298 gcc_assert (stack_realign_drap);
14300 if (ix86_static_chain_on_stack)
14301 param_ptr_offset += UNITS_PER_WORD;
14302 if (!call_used_regs[REGNO (crtl->drap_reg)])
14303 param_ptr_offset += UNITS_PER_WORD;
14305 insn = emit_insn (gen_rtx_SET
14306 (stack_pointer_rtx,
14307 gen_rtx_PLUS (Pmode,
14309 GEN_INT (-param_ptr_offset))));
14310 m->fs.cfa_reg = stack_pointer_rtx;
14311 m->fs.cfa_offset = param_ptr_offset;
14312 m->fs.sp_offset = param_ptr_offset;
14313 m->fs.realigned = false;
14315 add_reg_note (insn, REG_CFA_DEF_CFA,
14316 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14317 GEN_INT (param_ptr_offset)));
14318 RTX_FRAME_RELATED_P (insn) = 1;
14320 if (!call_used_regs[REGNO (crtl->drap_reg)])
14321 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14324 /* At this point the stack pointer must be valid, and we must have
14325 restored all of the registers. We may not have deallocated the
14326 entire stack frame. We've delayed this until now because it may
14327 be possible to merge the local stack deallocation with the
14328 deallocation forced by ix86_static_chain_on_stack. */
14329 gcc_assert (m->fs.sp_valid);
14330 gcc_assert (!m->fs.sp_realigned);
14331 gcc_assert (!m->fs.fp_valid);
14332 gcc_assert (!m->fs.realigned);
14333 if (m->fs.sp_offset != UNITS_PER_WORD)
14335 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14336 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14340 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14342 /* Sibcall epilogues don't want a return instruction. */
14345 m->fs = frame_state_save;
14349 if (cfun->machine->func_type != TYPE_NORMAL)
14350 emit_jump_insn (gen_interrupt_return ());
14351 else if (crtl->args.pops_args && crtl->args.size)
14353 rtx popc = GEN_INT (crtl->args.pops_args);
14355 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14356 address, do explicit add, and jump indirectly to the caller. */
14358 if (crtl->args.pops_args >= 65536)
14360 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14363 /* There is no "pascal" calling convention in any 64bit ABI. */
14364 gcc_assert (!TARGET_64BIT);
14366 insn = emit_insn (gen_pop (ecx));
14367 m->fs.cfa_offset -= UNITS_PER_WORD;
14368 m->fs.sp_offset -= UNITS_PER_WORD;
14370 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14371 x = gen_rtx_SET (stack_pointer_rtx, x);
14372 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14373 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14374 RTX_FRAME_RELATED_P (insn) = 1;
14376 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14378 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14381 emit_jump_insn (gen_simple_return_pop_internal (popc));
14383 else if (!m->call_ms2sysv || !restore_stub_is_tail)
14385 /* In case of return from EH a simple return cannot be used
14386 as a return address will be compared with a shadow stack
14387 return address. Use indirect jump instead. */
14388 if (style == 2 && flag_cf_protection)
14390 /* Register used in indirect jump must be in word_mode. But
14391 Pmode may not be the same as word_mode for x32. */
14392 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
14395 insn = emit_insn (gen_pop (ecx));
14396 m->fs.cfa_offset -= UNITS_PER_WORD;
14397 m->fs.sp_offset -= UNITS_PER_WORD;
14399 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14400 x = gen_rtx_SET (stack_pointer_rtx, x);
14401 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14402 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14403 RTX_FRAME_RELATED_P (insn) = 1;
14405 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14408 emit_jump_insn (gen_simple_return_internal ());
14411 /* Restore the state back to the state from the prologue,
14412 so that it's correct for the next epilogue. */
14413 m->fs = frame_state_save;
14416 /* Reset from the function's potential modifications. */
14419 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14421 if (pic_offset_table_rtx
14422 && !ix86_use_pseudo_pic_reg ())
14423 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14427 rtx_insn *insn = get_last_insn ();
14428 rtx_insn *deleted_debug_label = NULL;
14430 /* Mach-O doesn't support labels at the end of objects, so if
14431 it looks like we might want one, take special action.
14432 First, collect any sequence of deleted debug labels. */
14435 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14437 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14438 notes only, instead set their CODE_LABEL_NUMBER to -1,
14439 otherwise there would be code generation differences
14440 in between -g and -g0. */
14441 if (NOTE_P (insn) && NOTE_KIND (insn)
14442 == NOTE_INSN_DELETED_DEBUG_LABEL)
14443 deleted_debug_label = insn;
14444 insn = PREV_INSN (insn);
14450 then this needs to be detected, so skip past the barrier. */
14452 if (insn && BARRIER_P (insn))
14453 insn = PREV_INSN (insn);
14455 /* Up to now we've only seen notes or barriers. */
14460 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14461 /* Trailing label. */
14462 fputs ("\tnop\n", file);
14463 else if (cfun && ! cfun->is_thunk)
14465 /* See if we have a completely empty function body, skipping
14466 the special case of the picbase thunk emitted as asm. */
14467 while (insn && ! INSN_P (insn))
14468 insn = PREV_INSN (insn);
14469 /* If we don't find any insns, we've got an empty function body;
14470 I.e. completely empty - without a return or branch. This is
14471 taken as the case where a function body has been removed
14472 because it contains an inline __builtin_unreachable(). GCC
14473 declares that reaching __builtin_unreachable() means UB so
14474 we're not obliged to do anything special; however, we want
14475 non-zero-sized function bodies. To meet this, and help the
14476 user out, let's trap the case. */
14478 fputs ("\tud2\n", file);
14481 else if (deleted_debug_label)
14482 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14483 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14484 CODE_LABEL_NUMBER (insn) = -1;
14488 /* Return a scratch register to use in the split stack prologue. The
14489 split stack prologue is used for -fsplit-stack. It is the first
14490 instructions in the function, even before the regular prologue.
14491 The scratch register can be any caller-saved register which is not
14492 used for parameters or for the static chain. */
14494 static unsigned int
14495 split_stack_prologue_scratch_regno (void)
14501 bool is_fastcall, is_thiscall;
14504 is_fastcall = (lookup_attribute ("fastcall",
14505 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14507 is_thiscall = (lookup_attribute ("thiscall",
14508 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14510 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14514 if (DECL_STATIC_CHAIN (cfun->decl))
14516 sorry ("-fsplit-stack does not support fastcall with "
14517 "nested function");
14518 return INVALID_REGNUM;
14522 else if (is_thiscall)
14524 if (!DECL_STATIC_CHAIN (cfun->decl))
14528 else if (regparm < 3)
14530 if (!DECL_STATIC_CHAIN (cfun->decl))
14536 sorry ("-fsplit-stack does not support 2 register "
14537 "parameters for a nested function");
14538 return INVALID_REGNUM;
14545 /* FIXME: We could make this work by pushing a register
14546 around the addition and comparison. */
14547 sorry ("-fsplit-stack does not support 3 register parameters");
14548 return INVALID_REGNUM;
14553 /* A SYMBOL_REF for the function which allocates new stackspace for
14556 static GTY(()) rtx split_stack_fn;
14558 /* A SYMBOL_REF for the more stack function when using the large
14561 static GTY(()) rtx split_stack_fn_large;
14563 /* Return location of the stack guard value in the TLS block. */
14566 ix86_split_stack_guard (void)
14569 addr_space_t as = DEFAULT_TLS_SEG_REG;
14572 gcc_assert (flag_split_stack);
14574 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14575 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14577 gcc_unreachable ();
14580 r = GEN_INT (offset);
14581 r = gen_const_mem (Pmode, r);
14582 set_mem_addr_space (r, as);
14587 /* Handle -fsplit-stack. These are the first instructions in the
14588 function, even before the regular prologue. */
14591 ix86_expand_split_stack_prologue (void)
14593 HOST_WIDE_INT allocate;
14594 unsigned HOST_WIDE_INT args_size;
14595 rtx_code_label *label;
14596 rtx limit, current, allocate_rtx, call_fusage;
14597 rtx_insn *call_insn;
14598 rtx scratch_reg = NULL_RTX;
14599 rtx_code_label *varargs_label = NULL;
14602 gcc_assert (flag_split_stack && reload_completed);
14604 ix86_finalize_stack_frame_flags ();
14605 struct ix86_frame &frame = cfun->machine->frame;
14606 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
14608 /* This is the label we will branch to if we have enough stack
14609 space. We expect the basic block reordering pass to reverse this
14610 branch if optimizing, so that we branch in the unlikely case. */
14611 label = gen_label_rtx ();
14613 /* We need to compare the stack pointer minus the frame size with
14614 the stack boundary in the TCB. The stack boundary always gives
14615 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14616 can compare directly. Otherwise we need to do an addition. */
14618 limit = ix86_split_stack_guard ();
14620 if (allocate < SPLIT_STACK_AVAILABLE)
14621 current = stack_pointer_rtx;
14624 unsigned int scratch_regno;
14627 /* We need a scratch register to hold the stack pointer minus
14628 the required frame size. Since this is the very start of the
14629 function, the scratch register can be any caller-saved
14630 register which is not used for parameters. */
14631 offset = GEN_INT (- allocate);
14632 scratch_regno = split_stack_prologue_scratch_regno ();
14633 if (scratch_regno == INVALID_REGNUM)
14635 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14636 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
14638 /* We don't use ix86_gen_add3 in this case because it will
14639 want to split to lea, but when not optimizing the insn
14640 will not be split after this point. */
14641 emit_insn (gen_rtx_SET (scratch_reg,
14642 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14647 emit_move_insn (scratch_reg, offset);
14648 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
14649 stack_pointer_rtx));
14651 current = scratch_reg;
14654 ix86_expand_branch (GEU, current, limit, label);
14655 rtx_insn *jump_insn = get_last_insn ();
14656 JUMP_LABEL (jump_insn) = label;
14658 /* Mark the jump as very likely to be taken. */
14659 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
14661 if (split_stack_fn == NULL_RTX)
14663 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
14664 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
14666 fn = split_stack_fn;
14668 /* Get more stack space. We pass in the desired stack space and the
14669 size of the arguments to copy to the new stack. In 32-bit mode
14670 we push the parameters; __morestack will return on a new stack
14671 anyhow. In 64-bit mode we pass the parameters in r10 and
14673 allocate_rtx = GEN_INT (allocate);
14674 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
14675 call_fusage = NULL_RTX;
14676 rtx pop = NULL_RTX;
14681 reg10 = gen_rtx_REG (Pmode, R10_REG);
14682 reg11 = gen_rtx_REG (Pmode, R11_REG);
14684 /* If this function uses a static chain, it will be in %r10.
14685 Preserve it across the call to __morestack. */
14686 if (DECL_STATIC_CHAIN (cfun->decl))
14690 rax = gen_rtx_REG (word_mode, AX_REG);
14691 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
14692 use_reg (&call_fusage, rax);
14695 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
14698 HOST_WIDE_INT argval;
14700 gcc_assert (Pmode == DImode);
14701 /* When using the large model we need to load the address
14702 into a register, and we've run out of registers. So we
14703 switch to a different calling convention, and we call a
14704 different function: __morestack_large. We pass the
14705 argument size in the upper 32 bits of r10 and pass the
14706 frame size in the lower 32 bits. */
14707 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
14708 gcc_assert ((args_size & 0xffffffff) == args_size);
14710 if (split_stack_fn_large == NULL_RTX)
14712 split_stack_fn_large
14713 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
14714 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
14716 if (ix86_cmodel == CM_LARGE_PIC)
14718 rtx_code_label *label;
14721 label = gen_label_rtx ();
14722 emit_label (label);
14723 LABEL_PRESERVE_P (label) = 1;
14724 emit_insn (gen_set_rip_rex64 (reg10, label));
14725 emit_insn (gen_set_got_offset_rex64 (reg11, label));
14726 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
14727 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
14729 x = gen_rtx_CONST (Pmode, x);
14730 emit_move_insn (reg11, x);
14731 x = gen_rtx_PLUS (Pmode, reg10, reg11);
14732 x = gen_const_mem (Pmode, x);
14733 emit_move_insn (reg11, x);
14736 emit_move_insn (reg11, split_stack_fn_large);
14740 argval = ((args_size << 16) << 16) + allocate;
14741 emit_move_insn (reg10, GEN_INT (argval));
14745 emit_move_insn (reg10, allocate_rtx);
14746 emit_move_insn (reg11, GEN_INT (args_size));
14747 use_reg (&call_fusage, reg11);
14750 use_reg (&call_fusage, reg10);
14754 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
14755 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
14756 insn = emit_insn (gen_push (allocate_rtx));
14757 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
14758 pop = GEN_INT (2 * UNITS_PER_WORD);
14760 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
14761 GEN_INT (UNITS_PER_WORD), constm1_rtx,
14763 add_function_usage_to (call_insn, call_fusage);
14765 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
14766 /* Indicate that this function can't jump to non-local gotos. */
14767 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
14769 /* In order to make call/return prediction work right, we now need
14770 to execute a return instruction. See
14771 libgcc/config/i386/morestack.S for the details on how this works.
14773 For flow purposes gcc must not see this as a return
14774 instruction--we need control flow to continue at the subsequent
14775 label. Therefore, we use an unspec. */
14776 gcc_assert (crtl->args.pops_args < 65536);
14778 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
14780 if ((flag_cf_protection & CF_BRANCH))
14782 /* Insert ENDBR since __morestack will jump back here via indirect
14784 rtx cet_eb = gen_nop_endbr ();
14785 emit_insn_after (cet_eb, ret_insn);
14788 /* If we are in 64-bit mode and this function uses a static chain,
14789 we saved %r10 in %rax before calling _morestack. */
14790 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
14791 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14792 gen_rtx_REG (word_mode, AX_REG));
14794 /* If this function calls va_start, we need to store a pointer to
14795 the arguments on the old stack, because they may not have been
14796 all copied to the new stack. At this point the old stack can be
14797 found at the frame pointer value used by __morestack, because
14798 __morestack has set that up before calling back to us. Here we
14799 store that pointer in a scratch register, and in
14800 ix86_expand_prologue we store the scratch register in a stack
14802 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14804 unsigned int scratch_regno;
14808 scratch_regno = split_stack_prologue_scratch_regno ();
14809 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14810 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14814 return address within this function
14815 return address of caller of this function
14817 So we add three words to get to the stack arguments.
14821 return address within this function
14822 first argument to __morestack
14823 second argument to __morestack
14824 return address of caller of this function
14826 So we add five words to get to the stack arguments.
14828 words = TARGET_64BIT ? 3 : 5;
14829 emit_insn (gen_rtx_SET (scratch_reg,
14830 gen_rtx_PLUS (Pmode, frame_reg,
14831 GEN_INT (words * UNITS_PER_WORD))));
14833 varargs_label = gen_label_rtx ();
14834 emit_jump_insn (gen_jump (varargs_label));
14835 JUMP_LABEL (get_last_insn ()) = varargs_label;
14840 emit_label (label);
14841 LABEL_NUSES (label) = 1;
14843 /* If this function calls va_start, we now have to set the scratch
14844 register for the case where we do not call __morestack. In this
14845 case we need to set it based on the stack pointer. */
14846 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14848 emit_insn (gen_rtx_SET (scratch_reg,
14849 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14850 GEN_INT (UNITS_PER_WORD))));
14852 emit_label (varargs_label);
14853 LABEL_NUSES (varargs_label) = 1;
14857 /* We may have to tell the dataflow pass that the split stack prologue
14858 is initializing a scratch register. */
14861 ix86_live_on_entry (bitmap regs)
14863 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14865 gcc_assert (flag_split_stack);
14866 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14870 /* Extract the parts of an RTL expression that is a valid memory address
14871 for an instruction. Return 0 if the structure of the address is
14872 grossly off. Return -1 if the address contains ASHIFT, so it is not
14873 strictly valid, but still used for computing length of lea instruction. */
14876 ix86_decompose_address (rtx addr, struct ix86_address *out)
14878 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14879 rtx base_reg, index_reg;
14880 HOST_WIDE_INT scale = 1;
14881 rtx scale_rtx = NULL_RTX;
14884 addr_space_t seg = ADDR_SPACE_GENERIC;
14886 /* Allow zero-extended SImode addresses,
14887 they will be emitted with addr32 prefix. */
14888 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14890 if (GET_CODE (addr) == ZERO_EXTEND
14891 && GET_MODE (XEXP (addr, 0)) == SImode)
14893 addr = XEXP (addr, 0);
14894 if (CONST_INT_P (addr))
14897 else if (GET_CODE (addr) == AND
14898 && const_32bit_mask (XEXP (addr, 1), DImode))
14900 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
14901 if (addr == NULL_RTX)
14904 if (CONST_INT_P (addr))
14909 /* Allow SImode subregs of DImode addresses,
14910 they will be emitted with addr32 prefix. */
14911 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14913 if (SUBREG_P (addr)
14914 && GET_MODE (SUBREG_REG (addr)) == DImode)
14916 addr = SUBREG_REG (addr);
14917 if (CONST_INT_P (addr))
14924 else if (SUBREG_P (addr))
14926 if (REG_P (SUBREG_REG (addr)))
14931 else if (GET_CODE (addr) == PLUS)
14933 rtx addends[4], op;
14941 addends[n++] = XEXP (op, 1);
14944 while (GET_CODE (op) == PLUS);
14949 for (i = n; i >= 0; --i)
14952 switch (GET_CODE (op))
14957 index = XEXP (op, 0);
14958 scale_rtx = XEXP (op, 1);
14964 index = XEXP (op, 0);
14965 tmp = XEXP (op, 1);
14966 if (!CONST_INT_P (tmp))
14968 scale = INTVAL (tmp);
14969 if ((unsigned HOST_WIDE_INT) scale > 3)
14971 scale = 1 << scale;
14976 if (GET_CODE (op) != UNSPEC)
14981 if (XINT (op, 1) == UNSPEC_TP
14982 && TARGET_TLS_DIRECT_SEG_REFS
14983 && seg == ADDR_SPACE_GENERIC)
14984 seg = DEFAULT_TLS_SEG_REG;
14990 if (!REG_P (SUBREG_REG (op)))
15017 else if (GET_CODE (addr) == MULT)
15019 index = XEXP (addr, 0); /* index*scale */
15020 scale_rtx = XEXP (addr, 1);
15022 else if (GET_CODE (addr) == ASHIFT)
15024 /* We're called for lea too, which implements ashift on occasion. */
15025 index = XEXP (addr, 0);
15026 tmp = XEXP (addr, 1);
15027 if (!CONST_INT_P (tmp))
15029 scale = INTVAL (tmp);
15030 if ((unsigned HOST_WIDE_INT) scale > 3)
15032 scale = 1 << scale;
15036 disp = addr; /* displacement */
15042 else if (SUBREG_P (index)
15043 && REG_P (SUBREG_REG (index)))
15049 /* Extract the integral value of scale. */
15052 if (!CONST_INT_P (scale_rtx))
15054 scale = INTVAL (scale_rtx);
15057 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
15058 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
15060 /* Avoid useless 0 displacement. */
15061 if (disp == const0_rtx && (base || index))
15064 /* Allow arg pointer and stack pointer as index if there is not scaling. */
15065 if (base_reg && index_reg && scale == 1
15066 && (REGNO (index_reg) == ARG_POINTER_REGNUM
15067 || REGNO (index_reg) == FRAME_POINTER_REGNUM
15068 || REGNO (index_reg) == SP_REG))
15070 std::swap (base, index);
15071 std::swap (base_reg, index_reg);
15074 /* Special case: %ebp cannot be encoded as a base without a displacement.
15076 if (!disp && base_reg
15077 && (REGNO (base_reg) == ARG_POINTER_REGNUM
15078 || REGNO (base_reg) == FRAME_POINTER_REGNUM
15079 || REGNO (base_reg) == BP_REG
15080 || REGNO (base_reg) == R13_REG))
15083 /* Special case: on K6, [%esi] makes the instruction vector decoded.
15084 Avoid this by transforming to [%esi+0].
15085 Reload calls address legitimization without cfun defined, so we need
15086 to test cfun for being non-NULL. */
15087 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
15088 && base_reg && !index_reg && !disp
15089 && REGNO (base_reg) == SI_REG)
15092 /* Special case: encode reg+reg instead of reg*2. */
15093 if (!base && index && scale == 2)
15094 base = index, base_reg = index_reg, scale = 1;
15096 /* Special case: scaling cannot be encoded without base or displacement. */
15097 if (!base && !disp && index && scale != 1)
15101 out->index = index;
15103 out->scale = scale;
15109 /* Return cost of the memory address x.
15110 For i386, it is better to use a complex address than let gcc copy
15111 the address into a reg and make a new pseudo. But not if the address
15112 requires to two regs - that would mean more pseudos with longer
15115 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
15117 struct ix86_address parts;
15119 int ok = ix86_decompose_address (x, &parts);
15123 if (parts.base && SUBREG_P (parts.base))
15124 parts.base = SUBREG_REG (parts.base);
15125 if (parts.index && SUBREG_P (parts.index))
15126 parts.index = SUBREG_REG (parts.index);
15128 /* Attempt to minimize number of registers in the address by increasing
15129 address cost for each used register. We don't increase address cost
15130 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
15131 is not invariant itself it most likely means that base or index is not
15132 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
15133 which is not profitable for x86. */
15135 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
15136 && (current_pass->type == GIMPLE_PASS
15137 || !pic_offset_table_rtx
15138 || !REG_P (parts.base)
15139 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
15143 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
15144 && (current_pass->type == GIMPLE_PASS
15145 || !pic_offset_table_rtx
15146 || !REG_P (parts.index)
15147 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
15150 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
15151 since it's predecode logic can't detect the length of instructions
15152 and it degenerates to vector decoded. Increase cost of such
15153 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
15154 to split such addresses or even refuse such addresses at all.
15156 Following addressing modes are affected:
15161 The first and last case may be avoidable by explicitly coding the zero in
15162 memory address, but I don't have AMD-K6 machine handy to check this
15166 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
15167 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
15168 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
15174 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15175 this is used for to form addresses to local data when -fPIC is in
15179 darwin_local_data_pic (rtx disp)
15181 return (GET_CODE (disp) == UNSPEC
15182 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
15185 /* True if operand X should be loaded from GOT. */
15188 ix86_force_load_from_GOT_p (rtx x)
15190 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15191 && !TARGET_PECOFF && !TARGET_MACHO
15192 && !flag_plt && !flag_pic
15193 && ix86_cmodel != CM_LARGE
15194 && GET_CODE (x) == SYMBOL_REF
15195 && SYMBOL_REF_FUNCTION_P (x)
15196 && !SYMBOL_REF_LOCAL_P (x));
15199 /* Determine if a given RTX is a valid constant. We already know this
15200 satisfies CONSTANT_P. */
15203 ix86_legitimate_constant_p (machine_mode mode, rtx x)
15205 switch (GET_CODE (x))
15210 if (GET_CODE (x) == PLUS)
15212 if (!CONST_INT_P (XEXP (x, 1)))
15217 if (TARGET_MACHO && darwin_local_data_pic (x))
15220 /* Only some unspecs are valid as "constants". */
15221 if (GET_CODE (x) == UNSPEC)
15222 switch (XINT (x, 1))
15225 case UNSPEC_GOTOFF:
15226 case UNSPEC_PLTOFF:
15227 return TARGET_64BIT;
15229 case UNSPEC_NTPOFF:
15230 x = XVECEXP (x, 0, 0);
15231 return (GET_CODE (x) == SYMBOL_REF
15232 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15233 case UNSPEC_DTPOFF:
15234 x = XVECEXP (x, 0, 0);
15235 return (GET_CODE (x) == SYMBOL_REF
15236 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
15241 /* We must have drilled down to a symbol. */
15242 if (GET_CODE (x) == LABEL_REF)
15244 if (GET_CODE (x) != SYMBOL_REF)
15249 /* TLS symbols are never valid. */
15250 if (SYMBOL_REF_TLS_MODEL (x))
15253 /* DLLIMPORT symbols are never valid. */
15254 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15255 && SYMBOL_REF_DLLIMPORT_P (x))
15259 /* mdynamic-no-pic */
15260 if (MACHO_DYNAMIC_NO_PIC_P)
15261 return machopic_symbol_defined_p (x);
15264 /* External function address should be loaded
15265 via the GOT slot to avoid PLT. */
15266 if (ix86_force_load_from_GOT_p (x))
15271 CASE_CONST_SCALAR_INT:
15280 if (!standard_sse_constant_p (x, mode))
15288 if (!standard_sse_constant_p (x, mode))
15295 /* Otherwise we handle everything else in the move patterns. */
15299 /* Determine if it's legal to put X into the constant pool. This
15300 is not possible for the address of thread-local symbols, which
15301 is checked above. */
15304 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15306 /* We can put any immediate constant in memory. */
15307 switch (GET_CODE (x))
15316 return !ix86_legitimate_constant_p (mode, x);
15319 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15323 is_imported_p (rtx x)
15325 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15326 || GET_CODE (x) != SYMBOL_REF)
15329 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15333 /* Nonzero if the constant value X is a legitimate general operand
15334 when generating PIC code. It is given that flag_pic is on and
15335 that X satisfies CONSTANT_P. */
15338 legitimate_pic_operand_p (rtx x)
15342 switch (GET_CODE (x))
15345 inner = XEXP (x, 0);
15346 if (GET_CODE (inner) == PLUS
15347 && CONST_INT_P (XEXP (inner, 1)))
15348 inner = XEXP (inner, 0);
15350 /* Only some unspecs are valid as "constants". */
15351 if (GET_CODE (inner) == UNSPEC)
15352 switch (XINT (inner, 1))
15355 case UNSPEC_GOTOFF:
15356 case UNSPEC_PLTOFF:
15357 return TARGET_64BIT;
15359 x = XVECEXP (inner, 0, 0);
15360 return (GET_CODE (x) == SYMBOL_REF
15361 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15362 case UNSPEC_MACHOPIC_OFFSET:
15363 return legitimate_pic_address_disp_p (x);
15371 return legitimate_pic_address_disp_p (x);
15378 /* Determine if a given CONST RTX is a valid memory displacement
15382 legitimate_pic_address_disp_p (rtx disp)
15386 /* In 64bit mode we can allow direct addresses of symbols and labels
15387 when they are not dynamic symbols. */
15390 rtx op0 = disp, op1;
15392 switch (GET_CODE (disp))
15398 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15400 op0 = XEXP (XEXP (disp, 0), 0);
15401 op1 = XEXP (XEXP (disp, 0), 1);
15402 if (!CONST_INT_P (op1))
15404 if (GET_CODE (op0) == UNSPEC
15405 && (XINT (op0, 1) == UNSPEC_DTPOFF
15406 || XINT (op0, 1) == UNSPEC_NTPOFF)
15407 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15409 if (INTVAL (op1) >= 16*1024*1024
15410 || INTVAL (op1) < -16*1024*1024)
15412 if (GET_CODE (op0) == LABEL_REF)
15414 if (GET_CODE (op0) == CONST
15415 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15416 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15418 if (GET_CODE (op0) == UNSPEC
15419 && XINT (op0, 1) == UNSPEC_PCREL)
15421 if (GET_CODE (op0) != SYMBOL_REF)
15426 /* TLS references should always be enclosed in UNSPEC.
15427 The dllimported symbol needs always to be resolved. */
15428 if (SYMBOL_REF_TLS_MODEL (op0)
15429 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15434 if (is_imported_p (op0))
15437 if (SYMBOL_REF_FAR_ADDR_P (op0)
15438 || !SYMBOL_REF_LOCAL_P (op0))
15441 /* Function-symbols need to be resolved only for
15443 For the small-model we don't need to resolve anything
15445 if ((ix86_cmodel != CM_LARGE_PIC
15446 && SYMBOL_REF_FUNCTION_P (op0))
15447 || ix86_cmodel == CM_SMALL_PIC)
15449 /* Non-external symbols don't need to be resolved for
15450 large, and medium-model. */
15451 if ((ix86_cmodel == CM_LARGE_PIC
15452 || ix86_cmodel == CM_MEDIUM_PIC)
15453 && !SYMBOL_REF_EXTERNAL_P (op0))
15456 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15457 && (SYMBOL_REF_LOCAL_P (op0)
15458 || (HAVE_LD_PIE_COPYRELOC
15460 && !SYMBOL_REF_WEAK (op0)
15461 && !SYMBOL_REF_FUNCTION_P (op0)))
15462 && ix86_cmodel != CM_LARGE_PIC)
15470 if (GET_CODE (disp) != CONST)
15472 disp = XEXP (disp, 0);
15476 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15477 of GOT tables. We should not need these anyway. */
15478 if (GET_CODE (disp) != UNSPEC
15479 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15480 && XINT (disp, 1) != UNSPEC_GOTOFF
15481 && XINT (disp, 1) != UNSPEC_PCREL
15482 && XINT (disp, 1) != UNSPEC_PLTOFF))
15485 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15486 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15492 if (GET_CODE (disp) == PLUS)
15494 if (!CONST_INT_P (XEXP (disp, 1)))
15496 disp = XEXP (disp, 0);
15500 if (TARGET_MACHO && darwin_local_data_pic (disp))
15503 if (GET_CODE (disp) != UNSPEC)
15506 switch (XINT (disp, 1))
15511 /* We need to check for both symbols and labels because VxWorks loads
15512 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15514 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15515 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15516 case UNSPEC_GOTOFF:
15517 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15518 While ABI specify also 32bit relocation but we don't produce it in
15519 small PIC model at all. */
15520 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15521 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15523 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15525 case UNSPEC_GOTTPOFF:
15526 case UNSPEC_GOTNTPOFF:
15527 case UNSPEC_INDNTPOFF:
15530 disp = XVECEXP (disp, 0, 0);
15531 return (GET_CODE (disp) == SYMBOL_REF
15532 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15533 case UNSPEC_NTPOFF:
15534 disp = XVECEXP (disp, 0, 0);
15535 return (GET_CODE (disp) == SYMBOL_REF
15536 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15537 case UNSPEC_DTPOFF:
15538 disp = XVECEXP (disp, 0, 0);
15539 return (GET_CODE (disp) == SYMBOL_REF
15540 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15546 /* Determine if op is suitable RTX for an address register.
15547 Return naked register if a register or a register subreg is
15548 found, otherwise return NULL_RTX. */
15551 ix86_validate_address_register (rtx op)
15553 machine_mode mode = GET_MODE (op);
15555 /* Only SImode or DImode registers can form the address. */
15556 if (mode != SImode && mode != DImode)
15561 else if (SUBREG_P (op))
15563 rtx reg = SUBREG_REG (op);
15568 mode = GET_MODE (reg);
15570 /* Don't allow SUBREGs that span more than a word. It can
15571 lead to spill failures when the register is one word out
15572 of a two word structure. */
15573 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15576 /* Allow only SUBREGs of non-eliminable hard registers. */
15577 if (register_no_elim_operand (reg, mode))
15581 /* Op is not a register. */
15585 /* Recognizes RTL expressions that are valid memory addresses for an
15586 instruction. The MODE argument is the machine mode for the MEM
15587 expression that wants to use this address.
15589 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15590 convert common non-canonical forms to canonical form so that they will
15594 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
15596 struct ix86_address parts;
15597 rtx base, index, disp;
15598 HOST_WIDE_INT scale;
15601 if (ix86_decompose_address (addr, &parts) <= 0)
15602 /* Decomposition failed. */
15606 index = parts.index;
15608 scale = parts.scale;
15611 /* Validate base register. */
15614 rtx reg = ix86_validate_address_register (base);
15616 if (reg == NULL_RTX)
15619 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
15620 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
15621 /* Base is not valid. */
15625 /* Validate index register. */
15628 rtx reg = ix86_validate_address_register (index);
15630 if (reg == NULL_RTX)
15633 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
15634 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
15635 /* Index is not valid. */
15639 /* Index and base should have the same mode. */
15641 && GET_MODE (base) != GET_MODE (index))
15644 /* Address override works only on the (%reg) part of %fs:(%reg). */
15645 if (seg != ADDR_SPACE_GENERIC
15646 && ((base && GET_MODE (base) != word_mode)
15647 || (index && GET_MODE (index) != word_mode)))
15650 /* Validate scale factor. */
15654 /* Scale without index. */
15657 if (scale != 2 && scale != 4 && scale != 8)
15658 /* Scale is not a valid multiplier. */
15662 /* Validate displacement. */
15665 if (GET_CODE (disp) == CONST
15666 && GET_CODE (XEXP (disp, 0)) == UNSPEC
15667 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
15668 switch (XINT (XEXP (disp, 0), 1))
15670 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15671 when used. While ABI specify also 32bit relocations, we
15672 don't produce them at all and use IP relative instead.
15673 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15674 should be loaded via GOT. */
15677 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15678 goto is_legitimate_pic;
15680 case UNSPEC_GOTOFF:
15681 gcc_assert (flag_pic);
15683 goto is_legitimate_pic;
15685 /* 64bit address unspec. */
15688 case UNSPEC_GOTPCREL:
15689 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15690 goto is_legitimate_pic;
15693 gcc_assert (flag_pic);
15694 goto is_legitimate_pic;
15696 case UNSPEC_GOTTPOFF:
15697 case UNSPEC_GOTNTPOFF:
15698 case UNSPEC_INDNTPOFF:
15699 case UNSPEC_NTPOFF:
15700 case UNSPEC_DTPOFF:
15704 /* Invalid address unspec. */
15708 else if (SYMBOLIC_CONST (disp)
15712 && MACHOPIC_INDIRECT
15713 && !machopic_operand_p (disp)
15719 if (TARGET_64BIT && (index || base))
15721 /* foo@dtpoff(%rX) is ok. */
15722 if (GET_CODE (disp) != CONST
15723 || GET_CODE (XEXP (disp, 0)) != PLUS
15724 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
15725 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
15726 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
15727 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
15728 /* Non-constant pic memory reference. */
15731 else if ((!TARGET_MACHO || flag_pic)
15732 && ! legitimate_pic_address_disp_p (disp))
15733 /* Displacement is an invalid pic construct. */
15736 else if (MACHO_DYNAMIC_NO_PIC_P
15737 && !ix86_legitimate_constant_p (Pmode, disp))
15738 /* displacment must be referenced via non_lazy_pointer */
15742 /* This code used to verify that a symbolic pic displacement
15743 includes the pic_offset_table_rtx register.
15745 While this is good idea, unfortunately these constructs may
15746 be created by "adds using lea" optimization for incorrect
15755 This code is nonsensical, but results in addressing
15756 GOT table with pic_offset_table_rtx base. We can't
15757 just refuse it easily, since it gets matched by
15758 "addsi3" pattern, that later gets split to lea in the
15759 case output register differs from input. While this
15760 can be handled by separate addsi pattern for this case
15761 that never results in lea, this seems to be easier and
15762 correct fix for crash to disable this test. */
15764 else if (GET_CODE (disp) != LABEL_REF
15765 && !CONST_INT_P (disp)
15766 && (GET_CODE (disp) != CONST
15767 || !ix86_legitimate_constant_p (Pmode, disp))
15768 && (GET_CODE (disp) != SYMBOL_REF
15769 || !ix86_legitimate_constant_p (Pmode, disp)))
15770 /* Displacement is not constant. */
15772 else if (TARGET_64BIT
15773 && !x86_64_immediate_operand (disp, VOIDmode))
15774 /* Displacement is out of range. */
15776 /* In x32 mode, constant addresses are sign extended to 64bit, so
15777 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15778 else if (TARGET_X32 && !(index || base)
15779 && CONST_INT_P (disp)
15780 && val_signbit_known_set_p (SImode, INTVAL (disp)))
15784 /* Everything looks valid. */
15788 /* Determine if a given RTX is a valid constant address. */
15791 constant_address_p (rtx x)
15793 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
15796 /* Return a unique alias set for the GOT. */
15798 static alias_set_type
15799 ix86_GOT_alias_set (void)
15801 static alias_set_type set = -1;
15803 set = new_alias_set ();
15807 /* Return a legitimate reference for ORIG (an address) using the
15808 register REG. If REG is 0, a new pseudo is generated.
15810 There are two types of references that must be handled:
15812 1. Global data references must load the address from the GOT, via
15813 the PIC reg. An insn is emitted to do this load, and the reg is
15816 2. Static data references, constant pool addresses, and code labels
15817 compute the address as an offset from the GOT, whose base is in
15818 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15819 differentiate them from global data objects. The returned
15820 address is the PIC reg + an unspec constant.
15822 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15823 reg also appears in the address. */
15826 legitimize_pic_address (rtx orig, rtx reg)
15829 rtx new_rtx = orig;
15832 if (TARGET_MACHO && !TARGET_64BIT)
15835 reg = gen_reg_rtx (Pmode);
15836 /* Use the generic Mach-O PIC machinery. */
15837 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15841 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15843 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15848 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15850 else if ((!TARGET_64BIT
15851 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
15853 && gotoff_operand (addr, Pmode))
15855 /* This symbol may be referenced via a displacement
15856 from the PIC base address (@GOTOFF). */
15857 if (GET_CODE (addr) == CONST)
15858 addr = XEXP (addr, 0);
15860 if (GET_CODE (addr) == PLUS)
15862 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15864 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15867 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15869 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15872 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15876 gcc_assert (REG_P (reg));
15877 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
15878 new_rtx, reg, 1, OPTAB_DIRECT);
15881 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15883 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15884 /* We can't use @GOTOFF for text labels
15885 on VxWorks, see gotoff_operand. */
15886 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15888 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15892 /* For x64 PE-COFF there is no GOT table,
15893 so we use address directly. */
15894 if (TARGET_64BIT && TARGET_PECOFF)
15896 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15897 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15899 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15901 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
15903 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15904 new_rtx = gen_const_mem (Pmode, new_rtx);
15905 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15909 /* This symbol must be referenced via a load
15910 from the Global Offset Table (@GOT). */
15911 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15912 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15914 new_rtx = force_reg (Pmode, new_rtx);
15915 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15916 new_rtx = gen_const_mem (Pmode, new_rtx);
15917 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15920 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15924 if (CONST_INT_P (addr)
15925 && !x86_64_immediate_operand (addr, VOIDmode))
15926 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
15927 else if (GET_CODE (addr) == CONST)
15929 addr = XEXP (addr, 0);
15931 /* We must match stuff we generate before. Assume the only
15932 unspecs that can get here are ours. Not that we could do
15933 anything with them anyway.... */
15934 if (GET_CODE (addr) == UNSPEC
15935 || (GET_CODE (addr) == PLUS
15936 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15938 gcc_assert (GET_CODE (addr) == PLUS);
15941 if (GET_CODE (addr) == PLUS)
15943 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15945 /* Check first to see if this is a constant
15946 offset from a @GOTOFF symbol reference. */
15948 && gotoff_operand (op0, Pmode)
15949 && CONST_INT_P (op1))
15953 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15955 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15956 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15960 gcc_assert (REG_P (reg));
15961 new_rtx = expand_simple_binop (Pmode, PLUS,
15962 pic_offset_table_rtx,
15968 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15972 if (INTVAL (op1) < -16*1024*1024
15973 || INTVAL (op1) >= 16*1024*1024)
15975 if (!x86_64_immediate_operand (op1, Pmode))
15976 op1 = force_reg (Pmode, op1);
15979 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15985 rtx base = legitimize_pic_address (op0, reg);
15986 machine_mode mode = GET_MODE (base);
15988 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15990 if (CONST_INT_P (new_rtx))
15992 if (INTVAL (new_rtx) < -16*1024*1024
15993 || INTVAL (new_rtx) >= 16*1024*1024)
15995 if (!x86_64_immediate_operand (new_rtx, mode))
15996 new_rtx = force_reg (mode, new_rtx);
15999 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
16002 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
16006 /* For %rip addressing, we have to use
16007 just disp32, not base nor index. */
16009 && (GET_CODE (base) == SYMBOL_REF
16010 || GET_CODE (base) == LABEL_REF))
16011 base = force_reg (mode, base);
16012 if (GET_CODE (new_rtx) == PLUS
16013 && CONSTANT_P (XEXP (new_rtx, 1)))
16015 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
16016 new_rtx = XEXP (new_rtx, 1);
16018 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
16026 /* Load the thread pointer. If TO_REG is true, force it into a register. */
16029 get_thread_pointer (machine_mode tp_mode, bool to_reg)
16031 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
16033 if (GET_MODE (tp) != tp_mode)
16035 gcc_assert (GET_MODE (tp) == SImode);
16036 gcc_assert (tp_mode == DImode);
16038 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
16042 tp = copy_to_mode_reg (tp_mode, tp);
16047 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16049 static GTY(()) rtx ix86_tls_symbol;
16052 ix86_tls_get_addr (void)
16054 if (!ix86_tls_symbol)
16057 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
16058 ? "___tls_get_addr" : "__tls_get_addr");
16060 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
16063 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
16065 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
16067 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
16068 gen_rtx_CONST (Pmode, unspec));
16071 return ix86_tls_symbol;
16074 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16076 static GTY(()) rtx ix86_tls_module_base_symbol;
16079 ix86_tls_module_base (void)
16081 if (!ix86_tls_module_base_symbol)
16083 ix86_tls_module_base_symbol
16084 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
16086 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16087 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16090 return ix86_tls_module_base_symbol;
16093 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
16094 false if we expect this to be used for a memory address and true if
16095 we expect to load the address into a register. */
16098 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
16100 rtx dest, base, off;
16101 rtx pic = NULL_RTX, tp = NULL_RTX;
16102 machine_mode tp_mode = Pmode;
16105 /* Fall back to global dynamic model if tool chain cannot support local
16107 if (TARGET_SUN_TLS && !TARGET_64BIT
16108 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
16109 && model == TLS_MODEL_LOCAL_DYNAMIC)
16110 model = TLS_MODEL_GLOBAL_DYNAMIC;
16114 case TLS_MODEL_GLOBAL_DYNAMIC:
16115 dest = gen_reg_rtx (Pmode);
16119 if (flag_pic && !TARGET_PECOFF)
16120 pic = pic_offset_table_rtx;
16123 pic = gen_reg_rtx (Pmode);
16124 emit_insn (gen_set_got (pic));
16128 if (TARGET_GNU2_TLS)
16131 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
16133 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
16135 tp = get_thread_pointer (Pmode, true);
16136 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
16138 if (GET_MODE (x) != Pmode)
16139 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16141 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16145 rtx caddr = ix86_tls_get_addr ();
16149 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16154 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
16155 insns = get_insns ();
16158 if (GET_MODE (x) != Pmode)
16159 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16161 RTL_CONST_CALL_P (insns) = 1;
16162 emit_libcall_block (insns, dest, rax, x);
16165 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
16169 case TLS_MODEL_LOCAL_DYNAMIC:
16170 base = gen_reg_rtx (Pmode);
16175 pic = pic_offset_table_rtx;
16178 pic = gen_reg_rtx (Pmode);
16179 emit_insn (gen_set_got (pic));
16183 if (TARGET_GNU2_TLS)
16185 rtx tmp = ix86_tls_module_base ();
16188 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
16190 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
16192 tp = get_thread_pointer (Pmode, true);
16193 set_unique_reg_note (get_last_insn (), REG_EQUAL,
16194 gen_rtx_MINUS (Pmode, tmp, tp));
16198 rtx caddr = ix86_tls_get_addr ();
16202 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16208 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
16209 insns = get_insns ();
16212 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16213 share the LD_BASE result with other LD model accesses. */
16214 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
16215 UNSPEC_TLS_LD_BASE);
16217 RTL_CONST_CALL_P (insns) = 1;
16218 emit_libcall_block (insns, base, rax, eqv);
16221 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
16224 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
16225 off = gen_rtx_CONST (Pmode, off);
16227 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
16229 if (TARGET_GNU2_TLS)
16231 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
16233 if (GET_MODE (x) != Pmode)
16234 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16236 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16240 case TLS_MODEL_INITIAL_EXEC:
16243 if (TARGET_SUN_TLS && !TARGET_X32)
16245 /* The Sun linker took the AMD64 TLS spec literally
16246 and can only handle %rax as destination of the
16247 initial executable code sequence. */
16249 dest = gen_reg_rtx (DImode);
16250 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
16254 /* Generate DImode references to avoid %fs:(%reg32)
16255 problems and linker IE->LE relaxation bug. */
16258 type = UNSPEC_GOTNTPOFF;
16262 pic = pic_offset_table_rtx;
16263 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16265 else if (!TARGET_ANY_GNU_TLS)
16267 pic = gen_reg_rtx (Pmode);
16268 emit_insn (gen_set_got (pic));
16269 type = UNSPEC_GOTTPOFF;
16274 type = UNSPEC_INDNTPOFF;
16277 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16278 off = gen_rtx_CONST (tp_mode, off);
16280 off = gen_rtx_PLUS (tp_mode, pic, off);
16281 off = gen_const_mem (tp_mode, off);
16282 set_mem_alias_set (off, ix86_GOT_alias_set ());
16284 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16286 base = get_thread_pointer (tp_mode,
16287 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16288 off = force_reg (tp_mode, off);
16289 dest = gen_rtx_PLUS (tp_mode, base, off);
16290 if (tp_mode != Pmode)
16291 dest = convert_to_mode (Pmode, dest, 1);
16295 base = get_thread_pointer (Pmode, true);
16296 dest = gen_reg_rtx (Pmode);
16297 emit_insn (ix86_gen_sub3 (dest, base, off));
16301 case TLS_MODEL_LOCAL_EXEC:
16302 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16303 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16304 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16305 off = gen_rtx_CONST (Pmode, off);
16307 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16309 base = get_thread_pointer (Pmode,
16310 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16311 return gen_rtx_PLUS (Pmode, base, off);
16315 base = get_thread_pointer (Pmode, true);
16316 dest = gen_reg_rtx (Pmode);
16317 emit_insn (ix86_gen_sub3 (dest, base, off));
16322 gcc_unreachable ();
16328 /* Return true if OP refers to a TLS address. */
16330 ix86_tls_address_pattern_p (rtx op)
16332 subrtx_var_iterator::array_type array;
16333 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16338 rtx *x = &XEXP (op, 0);
16339 while (GET_CODE (*x) == PLUS)
16342 for (i = 0; i < 2; i++)
16344 rtx u = XEXP (*x, i);
16345 if (GET_CODE (u) == ZERO_EXTEND)
16347 if (GET_CODE (u) == UNSPEC
16348 && XINT (u, 1) == UNSPEC_TP)
16354 iter.skip_subrtxes ();
16361 /* Rewrite *LOC so that it refers to a default TLS address space. */
16363 ix86_rewrite_tls_address_1 (rtx *loc)
16365 subrtx_ptr_iterator::array_type array;
16366 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16371 rtx addr = XEXP (*loc, 0);
16373 while (GET_CODE (*x) == PLUS)
16376 for (i = 0; i < 2; i++)
16378 rtx u = XEXP (*x, i);
16379 if (GET_CODE (u) == ZERO_EXTEND)
16381 if (GET_CODE (u) == UNSPEC
16382 && XINT (u, 1) == UNSPEC_TP)
16384 addr_space_t as = DEFAULT_TLS_SEG_REG;
16386 *x = XEXP (*x, 1 - i);
16388 *loc = replace_equiv_address_nv (*loc, addr, true);
16389 set_mem_addr_space (*loc, as);
16396 iter.skip_subrtxes ();
16401 /* Rewrite instruction pattern involvning TLS address
16402 so that it refers to a default TLS address space. */
16404 ix86_rewrite_tls_address (rtx pattern)
16406 pattern = copy_insn (pattern);
16407 ix86_rewrite_tls_address_1 (&pattern);
16411 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16412 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16413 unique refptr-DECL symbol corresponding to symbol DECL. */
16415 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16417 static inline hashval_t hash (tree_map *m) { return m->hash; }
16419 equal (tree_map *a, tree_map *b)
16421 return a->base.from == b->base.from;
16425 keep_cache_entry (tree_map *&m)
16427 return ggc_marked_p (m->base.from);
16431 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16434 get_dllimport_decl (tree decl, bool beimport)
16436 struct tree_map *h, in;
16438 const char *prefix;
16439 size_t namelen, prefixlen;
16444 if (!dllimport_map)
16445 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16447 in.hash = htab_hash_pointer (decl);
16448 in.base.from = decl;
16449 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16454 *loc = h = ggc_alloc<tree_map> ();
16456 h->base.from = decl;
16457 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16458 VAR_DECL, NULL, ptr_type_node);
16459 DECL_ARTIFICIAL (to) = 1;
16460 DECL_IGNORED_P (to) = 1;
16461 DECL_EXTERNAL (to) = 1;
16462 TREE_READONLY (to) = 1;
16464 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16465 name = targetm.strip_name_encoding (name);
16467 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16468 ? "*__imp_" : "*__imp__";
16470 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16471 namelen = strlen (name);
16472 prefixlen = strlen (prefix);
16473 imp_name = (char *) alloca (namelen + prefixlen + 1);
16474 memcpy (imp_name, prefix, prefixlen);
16475 memcpy (imp_name + prefixlen, name, namelen + 1);
16477 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16478 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16479 SET_SYMBOL_REF_DECL (rtl, to);
16480 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16483 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16484 #ifdef SUB_TARGET_RECORD_STUB
16485 SUB_TARGET_RECORD_STUB (name);
16489 rtl = gen_const_mem (Pmode, rtl);
16490 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16492 SET_DECL_RTL (to, rtl);
16493 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16498 /* Expand SYMBOL into its corresponding far-address symbol.
16499 WANT_REG is true if we require the result be a register. */
16502 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16507 gcc_assert (SYMBOL_REF_DECL (symbol));
16508 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16510 x = DECL_RTL (imp_decl);
16512 x = force_reg (Pmode, x);
16516 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16517 true if we require the result be a register. */
16520 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16525 gcc_assert (SYMBOL_REF_DECL (symbol));
16526 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16528 x = DECL_RTL (imp_decl);
16530 x = force_reg (Pmode, x);
16534 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16535 is true if we require the result be a register. */
16538 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16540 if (!TARGET_PECOFF)
16543 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16545 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
16546 return legitimize_dllimport_symbol (addr, inreg);
16547 if (GET_CODE (addr) == CONST
16548 && GET_CODE (XEXP (addr, 0)) == PLUS
16549 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16550 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
16552 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
16553 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16557 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
16559 if (GET_CODE (addr) == SYMBOL_REF
16560 && !is_imported_p (addr)
16561 && SYMBOL_REF_EXTERNAL_P (addr)
16562 && SYMBOL_REF_DECL (addr))
16563 return legitimize_pe_coff_extern_decl (addr, inreg);
16565 if (GET_CODE (addr) == CONST
16566 && GET_CODE (XEXP (addr, 0)) == PLUS
16567 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16568 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
16569 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
16570 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
16572 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
16573 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16578 /* Try machine-dependent ways of modifying an illegitimate address
16579 to be legitimate. If we find one, return the new, valid address.
16580 This macro is used in only one place: `memory_address' in explow.c.
16582 OLDX is the address as it was before break_out_memory_refs was called.
16583 In some cases it is useful to look at this to decide what needs to be done.
16585 It is always safe for this macro to do nothing. It exists to recognize
16586 opportunities to optimize the output.
16588 For the 80386, we handle X+REG by loading X into a register R and
16589 using R+REG. R will go in a general reg and indexing will be used.
16590 However, if REG is a broken-out memory address or multiplication,
16591 nothing needs to be done because REG can certainly go in a general reg.
16593 When -fpic is used, special handling is needed for symbolic references.
16594 See comments by legitimize_pic_address in i386.c for details. */
16597 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
16599 bool changed = false;
16602 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
16604 return legitimize_tls_address (x, (enum tls_model) log, false);
16605 if (GET_CODE (x) == CONST
16606 && GET_CODE (XEXP (x, 0)) == PLUS
16607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
16608 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
16610 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
16611 (enum tls_model) log, false);
16612 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
16615 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16617 rtx tmp = legitimize_pe_coff_symbol (x, true);
16622 if (flag_pic && SYMBOLIC_CONST (x))
16623 return legitimize_pic_address (x, 0);
16626 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
16627 return machopic_indirect_data_reference (x, 0);
16630 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16631 if (GET_CODE (x) == ASHIFT
16632 && CONST_INT_P (XEXP (x, 1))
16633 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
16636 log = INTVAL (XEXP (x, 1));
16637 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
16638 GEN_INT (1 << log));
16641 if (GET_CODE (x) == PLUS)
16643 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16645 if (GET_CODE (XEXP (x, 0)) == ASHIFT
16646 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16647 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
16650 log = INTVAL (XEXP (XEXP (x, 0), 1));
16651 XEXP (x, 0) = gen_rtx_MULT (Pmode,
16652 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
16653 GEN_INT (1 << log));
16656 if (GET_CODE (XEXP (x, 1)) == ASHIFT
16657 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
16658 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
16661 log = INTVAL (XEXP (XEXP (x, 1), 1));
16662 XEXP (x, 1) = gen_rtx_MULT (Pmode,
16663 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
16664 GEN_INT (1 << log));
16667 /* Put multiply first if it isn't already. */
16668 if (GET_CODE (XEXP (x, 1)) == MULT)
16670 std::swap (XEXP (x, 0), XEXP (x, 1));
16674 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16675 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16676 created by virtual register instantiation, register elimination, and
16677 similar optimizations. */
16678 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
16681 x = gen_rtx_PLUS (Pmode,
16682 gen_rtx_PLUS (Pmode, XEXP (x, 0),
16683 XEXP (XEXP (x, 1), 0)),
16684 XEXP (XEXP (x, 1), 1));
16688 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16689 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16690 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
16691 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16692 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
16693 && CONSTANT_P (XEXP (x, 1)))
16696 rtx other = NULL_RTX;
16698 if (CONST_INT_P (XEXP (x, 1)))
16700 constant = XEXP (x, 1);
16701 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
16703 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
16705 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
16706 other = XEXP (x, 1);
16714 x = gen_rtx_PLUS (Pmode,
16715 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
16716 XEXP (XEXP (XEXP (x, 0), 1), 0)),
16717 plus_constant (Pmode, other,
16718 INTVAL (constant)));
16722 if (changed && ix86_legitimate_address_p (mode, x, false))
16725 if (GET_CODE (XEXP (x, 0)) == MULT)
16728 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
16731 if (GET_CODE (XEXP (x, 1)) == MULT)
16734 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
16738 && REG_P (XEXP (x, 1))
16739 && REG_P (XEXP (x, 0)))
16742 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
16745 x = legitimize_pic_address (x, 0);
16748 if (changed && ix86_legitimate_address_p (mode, x, false))
16751 if (REG_P (XEXP (x, 0)))
16753 rtx temp = gen_reg_rtx (Pmode);
16754 rtx val = force_operand (XEXP (x, 1), temp);
16757 val = convert_to_mode (Pmode, val, 1);
16758 emit_move_insn (temp, val);
16761 XEXP (x, 1) = temp;
16765 else if (REG_P (XEXP (x, 1)))
16767 rtx temp = gen_reg_rtx (Pmode);
16768 rtx val = force_operand (XEXP (x, 0), temp);
16771 val = convert_to_mode (Pmode, val, 1);
16772 emit_move_insn (temp, val);
16775 XEXP (x, 0) = temp;
16783 /* Print an integer constant expression in assembler syntax. Addition
16784 and subtraction are the only arithmetic that may appear in these
16785 expressions. FILE is the stdio stream to write to, X is the rtx, and
16786 CODE is the operand print code from the output string. */
16789 output_pic_addr_const (FILE *file, rtx x, int code)
16793 switch (GET_CODE (x))
16796 gcc_assert (flag_pic);
16801 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
16802 output_addr_const (file, x);
16805 const char *name = XSTR (x, 0);
16807 /* Mark the decl as referenced so that cgraph will
16808 output the function. */
16809 if (SYMBOL_REF_DECL (x))
16810 mark_decl_referenced (SYMBOL_REF_DECL (x));
16813 if (MACHOPIC_INDIRECT
16814 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
16815 name = machopic_indirection_name (x, /*stub_p=*/true);
16817 assemble_name (file, name);
16819 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
16820 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
16821 fputs ("@PLT", file);
16828 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
16829 assemble_name (asm_out_file, buf);
16833 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16837 /* This used to output parentheses around the expression,
16838 but that does not work on the 386 (either ATT or BSD assembler). */
16839 output_pic_addr_const (file, XEXP (x, 0), code);
16843 /* We can't handle floating point constants;
16844 TARGET_PRINT_OPERAND must handle them. */
16845 output_operand_lossage ("floating constant misused");
16849 /* Some assemblers need integer constants to appear first. */
16850 if (CONST_INT_P (XEXP (x, 0)))
16852 output_pic_addr_const (file, XEXP (x, 0), code);
16854 output_pic_addr_const (file, XEXP (x, 1), code);
16858 gcc_assert (CONST_INT_P (XEXP (x, 1)));
16859 output_pic_addr_const (file, XEXP (x, 1), code);
16861 output_pic_addr_const (file, XEXP (x, 0), code);
16867 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
16868 output_pic_addr_const (file, XEXP (x, 0), code);
16870 output_pic_addr_const (file, XEXP (x, 1), code);
16872 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
16876 gcc_assert (XVECLEN (x, 0) == 1);
16877 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16878 switch (XINT (x, 1))
16881 fputs ("@GOT", file);
16883 case UNSPEC_GOTOFF:
16884 fputs ("@GOTOFF", file);
16886 case UNSPEC_PLTOFF:
16887 fputs ("@PLTOFF", file);
16890 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16891 "(%rip)" : "[rip]", file);
16893 case UNSPEC_GOTPCREL:
16894 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16895 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16897 case UNSPEC_GOTTPOFF:
16898 /* FIXME: This might be @TPOFF in Sun ld too. */
16899 fputs ("@gottpoff", file);
16902 fputs ("@tpoff", file);
16904 case UNSPEC_NTPOFF:
16906 fputs ("@tpoff", file);
16908 fputs ("@ntpoff", file);
16910 case UNSPEC_DTPOFF:
16911 fputs ("@dtpoff", file);
16913 case UNSPEC_GOTNTPOFF:
16915 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16916 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16918 fputs ("@gotntpoff", file);
16920 case UNSPEC_INDNTPOFF:
16921 fputs ("@indntpoff", file);
16924 case UNSPEC_MACHOPIC_OFFSET:
16926 machopic_output_function_base_name (file);
16930 output_operand_lossage ("invalid UNSPEC as operand");
16936 output_operand_lossage ("invalid expression as operand");
16940 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16941 We need to emit DTP-relative relocations. */
16943 static void ATTRIBUTE_UNUSED
16944 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16946 fputs (ASM_LONG, file);
16947 output_addr_const (file, x);
16948 fputs ("@dtpoff", file);
16954 fputs (", 0", file);
16957 gcc_unreachable ();
16961 /* Return true if X is a representation of the PIC register. This copes
16962 with calls from ix86_find_base_term, where the register might have
16963 been replaced by a cselib value. */
16966 ix86_pic_register_p (rtx x)
16968 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16969 return (pic_offset_table_rtx
16970 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16971 else if (!REG_P (x))
16973 else if (pic_offset_table_rtx)
16975 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16977 if (HARD_REGISTER_P (x)
16978 && !HARD_REGISTER_P (pic_offset_table_rtx)
16979 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16984 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16987 /* Helper function for ix86_delegitimize_address.
16988 Attempt to delegitimize TLS local-exec accesses. */
16991 ix86_delegitimize_tls_address (rtx orig_x)
16993 rtx x = orig_x, unspec;
16994 struct ix86_address addr;
16996 if (!TARGET_TLS_DIRECT_SEG_REFS)
17000 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
17002 if (ix86_decompose_address (x, &addr) == 0
17003 || addr.seg != DEFAULT_TLS_SEG_REG
17004 || addr.disp == NULL_RTX
17005 || GET_CODE (addr.disp) != CONST)
17007 unspec = XEXP (addr.disp, 0);
17008 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
17009 unspec = XEXP (unspec, 0);
17010 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
17012 x = XVECEXP (unspec, 0, 0);
17013 gcc_assert (GET_CODE (x) == SYMBOL_REF);
17014 if (unspec != XEXP (addr.disp, 0))
17015 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
17018 rtx idx = addr.index;
17019 if (addr.scale != 1)
17020 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
17021 x = gen_rtx_PLUS (Pmode, idx, x);
17024 x = gen_rtx_PLUS (Pmode, addr.base, x);
17025 if (MEM_P (orig_x))
17026 x = replace_equiv_address_nv (orig_x, x);
17030 /* In the name of slightly smaller debug output, and to cater to
17031 general assembler lossage, recognize PIC+GOTOFF and turn it back
17032 into a direct symbol reference.
17034 On Darwin, this is necessary to avoid a crash, because Darwin
17035 has a different PIC label for each routine but the DWARF debugging
17036 information is not associated with any particular routine, so it's
17037 necessary to remove references to the PIC label from RTL stored by
17038 the DWARF output code.
17040 This helper is used in the normal ix86_delegitimize_address
17041 entrypoint (e.g. used in the target delegitimization hook) and
17042 in ix86_find_base_term. As compile time memory optimization, we
17043 avoid allocating rtxes that will not change anything on the outcome
17044 of the callers (find_base_value and find_base_term). */
17047 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
17049 rtx orig_x = delegitimize_mem_from_attrs (x);
17050 /* addend is NULL or some rtx if x is something+GOTOFF where
17051 something doesn't include the PIC register. */
17052 rtx addend = NULL_RTX;
17053 /* reg_addend is NULL or a multiple of some register. */
17054 rtx reg_addend = NULL_RTX;
17055 /* const_addend is NULL or a const_int. */
17056 rtx const_addend = NULL_RTX;
17057 /* This is the result, or NULL. */
17058 rtx result = NULL_RTX;
17067 if (GET_CODE (x) == CONST
17068 && GET_CODE (XEXP (x, 0)) == PLUS
17069 && GET_MODE (XEXP (x, 0)) == Pmode
17070 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17071 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
17072 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
17074 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
17075 base. A CONST can't be arg_pointer_rtx based. */
17076 if (base_term_p && MEM_P (orig_x))
17078 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
17079 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
17080 if (MEM_P (orig_x))
17081 x = replace_equiv_address_nv (orig_x, x);
17085 if (GET_CODE (x) == CONST
17086 && GET_CODE (XEXP (x, 0)) == UNSPEC
17087 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
17088 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
17089 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
17091 x = XVECEXP (XEXP (x, 0), 0, 0);
17092 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
17094 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
17101 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
17102 return ix86_delegitimize_tls_address (orig_x);
17104 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
17105 and -mcmodel=medium -fpic. */
17108 if (GET_CODE (x) != PLUS
17109 || GET_CODE (XEXP (x, 1)) != CONST)
17110 return ix86_delegitimize_tls_address (orig_x);
17112 if (ix86_pic_register_p (XEXP (x, 0)))
17113 /* %ebx + GOT/GOTOFF */
17115 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17117 /* %ebx + %reg * scale + GOT/GOTOFF */
17118 reg_addend = XEXP (x, 0);
17119 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
17120 reg_addend = XEXP (reg_addend, 1);
17121 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
17122 reg_addend = XEXP (reg_addend, 0);
17125 reg_addend = NULL_RTX;
17126 addend = XEXP (x, 0);
17130 addend = XEXP (x, 0);
17132 x = XEXP (XEXP (x, 1), 0);
17133 if (GET_CODE (x) == PLUS
17134 && CONST_INT_P (XEXP (x, 1)))
17136 const_addend = XEXP (x, 1);
17140 if (GET_CODE (x) == UNSPEC
17141 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
17142 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
17143 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
17144 && !MEM_P (orig_x) && !addend)))
17145 result = XVECEXP (x, 0, 0);
17147 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
17148 && !MEM_P (orig_x))
17149 result = XVECEXP (x, 0, 0);
17152 return ix86_delegitimize_tls_address (orig_x);
17154 /* For (PLUS something CONST_INT) both find_base_{value,term} just
17155 recurse on the first operand. */
17156 if (const_addend && !base_term_p)
17157 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
17159 result = gen_rtx_PLUS (Pmode, reg_addend, result);
17162 /* If the rest of original X doesn't involve the PIC register, add
17163 addend and subtract pic_offset_table_rtx. This can happen e.g.
17165 leal (%ebx, %ecx, 4), %ecx
17167 movl foo@GOTOFF(%ecx), %edx
17168 in which case we return (%ecx - %ebx) + foo
17169 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17170 and reload has completed. Don't do the latter for debug,
17171 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
17172 if (pic_offset_table_rtx
17173 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
17174 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
17175 pic_offset_table_rtx),
17177 else if (base_term_p
17178 && pic_offset_table_rtx
17180 && !TARGET_VXWORKS_RTP)
17182 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
17183 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
17184 result = gen_rtx_PLUS (Pmode, tmp, result);
17189 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
17191 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
17192 if (result == NULL_RTX)
17198 /* The normal instantiation of the above template. */
17201 ix86_delegitimize_address (rtx x)
17203 return ix86_delegitimize_address_1 (x, false);
17206 /* If X is a machine specific address (i.e. a symbol or label being
17207 referenced as a displacement from the GOT implemented using an
17208 UNSPEC), then return the base term. Otherwise return X. */
17211 ix86_find_base_term (rtx x)
17217 if (GET_CODE (x) != CONST)
17219 term = XEXP (x, 0);
17220 if (GET_CODE (term) == PLUS
17221 && CONST_INT_P (XEXP (term, 1)))
17222 term = XEXP (term, 0);
17223 if (GET_CODE (term) != UNSPEC
17224 || (XINT (term, 1) != UNSPEC_GOTPCREL
17225 && XINT (term, 1) != UNSPEC_PCREL))
17228 return XVECEXP (term, 0, 0);
17231 return ix86_delegitimize_address_1 (x, true);
17234 /* Return true if X shouldn't be emitted into the debug info.
17235 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
17236 symbol easily into the .debug_info section, so we need not to
17237 delegitimize, but instead assemble as @gotoff.
17238 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
17239 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
17242 ix86_const_not_ok_for_debug_p (rtx x)
17244 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
17247 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
17254 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
17255 bool fp, FILE *file)
17257 const char *suffix;
17259 if (mode == CCFPmode)
17261 code = ix86_fp_compare_code_to_integer (code);
17265 code = reverse_condition (code);
17270 gcc_assert (mode != CCGZmode);
17294 gcc_assert (mode != CCGZmode);
17318 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17322 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17323 Those same assemblers have the same but opposite lossage on cmov. */
17324 if (mode == CCmode)
17325 suffix = fp ? "nbe" : "a";
17327 gcc_unreachable ();
17344 gcc_unreachable ();
17348 if (mode == CCmode || mode == CCGZmode)
17350 else if (mode == CCCmode)
17351 suffix = fp ? "b" : "c";
17353 gcc_unreachable ();
17370 gcc_unreachable ();
17374 if (mode == CCmode || mode == CCGZmode)
17376 else if (mode == CCCmode)
17377 suffix = fp ? "nb" : "nc";
17379 gcc_unreachable ();
17382 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17386 if (mode == CCmode)
17389 gcc_unreachable ();
17392 suffix = fp ? "u" : "p";
17395 suffix = fp ? "nu" : "np";
17398 gcc_unreachable ();
17400 fputs (suffix, file);
17403 /* Print the name of register X to FILE based on its machine mode and number.
17404 If CODE is 'w', pretend the mode is HImode.
17405 If CODE is 'b', pretend the mode is QImode.
17406 If CODE is 'k', pretend the mode is SImode.
17407 If CODE is 'q', pretend the mode is DImode.
17408 If CODE is 'x', pretend the mode is V4SFmode.
17409 If CODE is 't', pretend the mode is V8SFmode.
17410 If CODE is 'g', pretend the mode is V16SFmode.
17411 If CODE is 'h', pretend the reg is the 'high' byte register.
17412 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17413 If CODE is 'd', duplicate the operand for AVX instruction.
17414 If CODE is 'V', print naked full integer register name without %.
17418 print_reg (rtx x, int code, FILE *file)
17422 unsigned int regno;
17425 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
17430 gcc_assert (TARGET_64BIT);
17431 fputs ("rip", file);
17435 if (code == 'y' && STACK_TOP_P (x))
17437 fputs ("st(0)", file);
17443 else if (code == 'b')
17445 else if (code == 'k')
17447 else if (code == 'q')
17449 else if (code == 'h')
17451 else if (code == 'x')
17453 else if (code == 't')
17455 else if (code == 'g')
17458 msize = GET_MODE_SIZE (GET_MODE (x));
17462 if (regno == ARG_POINTER_REGNUM
17463 || regno == FRAME_POINTER_REGNUM
17464 || regno == FPSR_REG)
17466 output_operand_lossage
17467 ("invalid use of register '%s'", reg_names[regno]);
17470 else if (regno == FLAGS_REG)
17472 output_operand_lossage ("invalid use of asm flag output");
17478 if (GENERAL_REGNO_P (regno))
17479 msize = GET_MODE_SIZE (word_mode);
17481 error ("'V' modifier on non-integer register");
17484 duplicated = code == 'd' && TARGET_AVX;
17491 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17492 warning (0, "unsupported size for integer register");
17495 if (LEGACY_INT_REGNO_P (regno))
17496 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17500 reg = hi_reg_name[regno];
17503 if (regno >= ARRAY_SIZE (qi_reg_name))
17505 if (!ANY_QI_REGNO_P (regno))
17506 error ("unsupported size for integer register");
17507 reg = qi_reg_name[regno];
17510 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17512 reg = qi_high_reg_name[regno];
17516 if (SSE_REGNO_P (regno))
17518 gcc_assert (!duplicated);
17519 putc (msize == 32 ? 'y' : 'z', file);
17520 reg = hi_reg_name[regno] + 1;
17525 gcc_unreachable ();
17530 /* Irritatingly, AMD extended registers use
17531 different naming convention: "r%d[bwd]" */
17532 if (REX_INT_REGNO_P (regno))
17534 gcc_assert (TARGET_64BIT);
17538 error ("extended registers have no high halves");
17553 error ("unsupported operand size for extended register");
17561 if (ASSEMBLER_DIALECT == ASM_ATT)
17562 fprintf (file, ", %%%s", reg);
17564 fprintf (file, ", %s", reg);
17568 /* Meaning of CODE:
17569 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17570 C -- print opcode suffix for set/cmov insn.
17571 c -- like C, but print reversed condition
17572 F,f -- likewise, but for floating-point.
17573 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17575 R -- print embedded rounding and sae.
17576 r -- print only sae.
17577 z -- print the opcode suffix for the size of the current operand.
17578 Z -- likewise, with special suffixes for x87 instructions.
17579 * -- print a star (in certain assembler syntax)
17580 A -- print an absolute memory reference.
17581 E -- print address with DImode register names if TARGET_64BIT.
17582 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17583 s -- print a shift double count, followed by the assemblers argument
17585 b -- print the QImode name of the register for the indicated operand.
17586 %b0 would print %al if operands[0] is reg 0.
17587 w -- likewise, print the HImode name of the register.
17588 k -- likewise, print the SImode name of the register.
17589 q -- likewise, print the DImode name of the register.
17590 x -- likewise, print the V4SFmode name of the register.
17591 t -- likewise, print the V8SFmode name of the register.
17592 g -- likewise, print the V16SFmode name of the register.
17593 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17594 y -- print "st(0)" instead of "st" as a register.
17595 d -- print duplicated register operand for AVX instruction.
17596 D -- print condition for SSE cmp instruction.
17597 P -- if PIC, print an @PLT suffix.
17598 p -- print raw symbol name.
17599 X -- don't print any sort of PIC '@' suffix for a symbol.
17600 & -- print some in-use local-dynamic symbol name.
17601 H -- print a memory address offset by 8; used for sse high-parts
17602 Y -- print condition for XOP pcom* instruction.
17603 V -- print naked full integer register name without %.
17604 + -- print a branch hint as 'cs' or 'ds' prefix
17605 ; -- print a semicolon (after prefixes due to bug in older gas).
17606 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17607 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17608 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
17612 ix86_print_operand (FILE *file, rtx x, int code)
17619 switch (ASSEMBLER_DIALECT)
17626 /* Intel syntax. For absolute addresses, registers should not
17627 be surrounded by braces. */
17631 ix86_print_operand (file, x, 0);
17638 gcc_unreachable ();
17641 ix86_print_operand (file, x, 0);
17645 /* Wrap address in an UNSPEC to declare special handling. */
17647 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
17649 output_address (VOIDmode, x);
17653 if (ASSEMBLER_DIALECT == ASM_ATT)
17658 if (ASSEMBLER_DIALECT == ASM_ATT)
17663 if (ASSEMBLER_DIALECT == ASM_ATT)
17668 if (ASSEMBLER_DIALECT == ASM_ATT)
17673 if (ASSEMBLER_DIALECT == ASM_ATT)
17678 if (ASSEMBLER_DIALECT == ASM_ATT)
17683 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17684 if (ASSEMBLER_DIALECT != ASM_ATT)
17687 switch (GET_MODE_SIZE (GET_MODE (x)))
17702 output_operand_lossage ("invalid operand size for operand "
17712 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17714 /* Opcodes don't get size suffixes if using Intel opcodes. */
17715 if (ASSEMBLER_DIALECT == ASM_INTEL)
17718 switch (GET_MODE_SIZE (GET_MODE (x)))
17737 output_operand_lossage ("invalid operand size for operand "
17743 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17744 warning (0, "non-integer operand used with operand code 'z'");
17748 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17749 if (ASSEMBLER_DIALECT == ASM_INTEL)
17752 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17754 switch (GET_MODE_SIZE (GET_MODE (x)))
17757 #ifdef HAVE_AS_IX86_FILDS
17767 #ifdef HAVE_AS_IX86_FILDQ
17770 fputs ("ll", file);
17778 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17780 /* 387 opcodes don't get size suffixes
17781 if the operands are registers. */
17782 if (STACK_REG_P (x))
17785 switch (GET_MODE_SIZE (GET_MODE (x)))
17806 output_operand_lossage ("invalid operand type used with "
17807 "operand code 'Z'");
17811 output_operand_lossage ("invalid operand size for operand code 'Z'");
17831 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
17833 ix86_print_operand (file, x, 0);
17834 fputs (", ", file);
17839 switch (GET_CODE (x))
17842 fputs ("neq", file);
17845 fputs ("eq", file);
17849 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
17853 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
17857 fputs ("le", file);
17861 fputs ("lt", file);
17864 fputs ("unord", file);
17867 fputs ("ord", file);
17870 fputs ("ueq", file);
17873 fputs ("nlt", file);
17876 fputs ("nle", file);
17879 fputs ("ule", file);
17882 fputs ("ult", file);
17885 fputs ("une", file);
17888 output_operand_lossage ("operand is not a condition code, "
17889 "invalid operand code 'Y'");
17895 /* Little bit of braindamage here. The SSE compare instructions
17896 does use completely different names for the comparisons that the
17897 fp conditional moves. */
17898 switch (GET_CODE (x))
17903 fputs ("eq_us", file);
17908 fputs ("eq", file);
17913 fputs ("nge", file);
17918 fputs ("lt", file);
17923 fputs ("ngt", file);
17928 fputs ("le", file);
17931 fputs ("unord", file);
17936 fputs ("neq_oq", file);
17941 fputs ("neq", file);
17946 fputs ("ge", file);
17951 fputs ("nlt", file);
17956 fputs ("gt", file);
17961 fputs ("nle", file);
17964 fputs ("ord", file);
17967 output_operand_lossage ("operand is not a condition code, "
17968 "invalid operand code 'D'");
17975 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17976 if (ASSEMBLER_DIALECT == ASM_ATT)
17978 gcc_fallthrough ();
17983 if (!COMPARISON_P (x))
17985 output_operand_lossage ("operand is not a condition code, "
17986 "invalid operand code '%c'", code);
17989 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17990 code == 'c' || code == 'f',
17991 code == 'F' || code == 'f',
17996 if (!offsettable_memref_p (x))
17998 output_operand_lossage ("operand is not an offsettable memory "
17999 "reference, invalid operand code 'H'");
18002 /* It doesn't actually matter what mode we use here, as we're
18003 only going to use this for printing. */
18004 x = adjust_address_nv (x, DImode, 8);
18005 /* Output 'qword ptr' for intel assembler dialect. */
18006 if (ASSEMBLER_DIALECT == ASM_INTEL)
18011 if (!CONST_INT_P (x))
18013 output_operand_lossage ("operand is not an integer, invalid "
18014 "operand code 'K'");
18018 if (INTVAL (x) & IX86_HLE_ACQUIRE)
18019 #ifdef HAVE_AS_IX86_HLE
18020 fputs ("xacquire ", file);
18022 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
18024 else if (INTVAL (x) & IX86_HLE_RELEASE)
18025 #ifdef HAVE_AS_IX86_HLE
18026 fputs ("xrelease ", file);
18028 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
18030 /* We do not want to print value of the operand. */
18034 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
18035 fputs ("{z}", file);
18039 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
18041 output_operand_lossage ("operand is not a specific integer, "
18042 "invalid operand code 'r'");
18046 if (ASSEMBLER_DIALECT == ASM_INTEL)
18047 fputs (", ", file);
18049 fputs ("{sae}", file);
18051 if (ASSEMBLER_DIALECT == ASM_ATT)
18052 fputs (", ", file);
18057 if (!CONST_INT_P (x))
18059 output_operand_lossage ("operand is not an integer, invalid "
18060 "operand code 'R'");
18064 if (ASSEMBLER_DIALECT == ASM_INTEL)
18065 fputs (", ", file);
18067 switch (INTVAL (x))
18069 case ROUND_NEAREST_INT | ROUND_SAE:
18070 fputs ("{rn-sae}", file);
18072 case ROUND_NEG_INF | ROUND_SAE:
18073 fputs ("{rd-sae}", file);
18075 case ROUND_POS_INF | ROUND_SAE:
18076 fputs ("{ru-sae}", file);
18078 case ROUND_ZERO | ROUND_SAE:
18079 fputs ("{rz-sae}", file);
18082 output_operand_lossage ("operand is not a specific integer, "
18083 "invalid operand code 'R'");
18086 if (ASSEMBLER_DIALECT == ASM_ATT)
18087 fputs (", ", file);
18092 if (ASSEMBLER_DIALECT == ASM_ATT)
18098 const char *name = get_some_local_dynamic_name ();
18100 output_operand_lossage ("'%%&' used without any "
18101 "local dynamic TLS references");
18103 assemble_name (file, name);
18112 || optimize_function_for_size_p (cfun)
18113 || !TARGET_BRANCH_PREDICTION_HINTS)
18116 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
18119 int pred_val = profile_probability::from_reg_br_prob_note
18120 (XINT (x, 0)).to_reg_br_prob_base ();
18122 if (pred_val < REG_BR_PROB_BASE * 45 / 100
18123 || pred_val > REG_BR_PROB_BASE * 55 / 100)
18125 bool taken = pred_val > REG_BR_PROB_BASE / 2;
18127 = final_forward_branch_p (current_output_insn) == 0;
18129 /* Emit hints only in the case default branch prediction
18130 heuristics would fail. */
18131 if (taken != cputaken)
18133 /* We use 3e (DS) prefix for taken branches and
18134 2e (CS) prefix for not taken branches. */
18136 fputs ("ds ; ", file);
18138 fputs ("cs ; ", file);
18146 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
18152 putc (TARGET_AVX2 ? 'i' : 'f', file);
18156 if (TARGET_64BIT && Pmode != word_mode)
18157 fputs ("addr32 ", file);
18161 if (ix86_notrack_prefixed_insn_p (current_output_insn))
18162 fputs ("notrack ", file);
18166 output_operand_lossage ("invalid operand code '%c'", code);
18171 print_reg (x, code, file);
18173 else if (MEM_P (x))
18175 rtx addr = XEXP (x, 0);
18177 /* No `byte ptr' prefix for call instructions ... */
18178 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
18180 machine_mode mode = GET_MODE (x);
18183 /* Check for explicit size override codes. */
18186 else if (code == 'w')
18188 else if (code == 'k')
18190 else if (code == 'q')
18192 else if (code == 'x')
18194 else if (code == 't')
18196 else if (code == 'g')
18198 else if (mode == BLKmode)
18199 /* ... or BLKmode operands, when not overridden. */
18202 switch (GET_MODE_SIZE (mode))
18204 case 1: size = "BYTE"; break;
18205 case 2: size = "WORD"; break;
18206 case 4: size = "DWORD"; break;
18207 case 8: size = "QWORD"; break;
18208 case 12: size = "TBYTE"; break;
18210 if (mode == XFmode)
18215 case 32: size = "YMMWORD"; break;
18216 case 64: size = "ZMMWORD"; break;
18218 gcc_unreachable ();
18222 fputs (size, file);
18223 fputs (" PTR ", file);
18227 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
18228 output_operand_lossage ("invalid constraints for operand");
18230 ix86_print_operand_address_as
18231 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
18234 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
18238 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18240 if (ASSEMBLER_DIALECT == ASM_ATT)
18242 /* Sign extend 32bit SFmode immediate to 8 bytes. */
18244 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
18245 (unsigned long long) (int) l);
18247 fprintf (file, "0x%08x", (unsigned int) l);
18250 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
18254 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18256 if (ASSEMBLER_DIALECT == ASM_ATT)
18258 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
18261 /* These float cases don't actually occur as immediate operands. */
18262 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
18266 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
18267 fputs (dstr, file);
18272 /* We have patterns that allow zero sets of memory, for instance.
18273 In 64-bit mode, we should probably support all 8-byte vectors,
18274 since we can in fact encode that into an immediate. */
18275 if (GET_CODE (x) == CONST_VECTOR)
18277 if (x != CONST0_RTX (GET_MODE (x)))
18278 output_operand_lossage ("invalid vector immediate");
18282 if (code != 'P' && code != 'p')
18284 if (CONST_INT_P (x))
18286 if (ASSEMBLER_DIALECT == ASM_ATT)
18289 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18290 || GET_CODE (x) == LABEL_REF)
18292 if (ASSEMBLER_DIALECT == ASM_ATT)
18295 fputs ("OFFSET FLAT:", file);
18298 if (CONST_INT_P (x))
18299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18300 else if (flag_pic || MACHOPIC_INDIRECT)
18301 output_pic_addr_const (file, x, code);
18303 output_addr_const (file, x);
18308 ix86_print_operand_punct_valid_p (unsigned char code)
18310 return (code == '*' || code == '+' || code == '&' || code == ';'
18311 || code == '~' || code == '^' || code == '!');
18314 /* Print a memory operand whose address is ADDR. */
18317 ix86_print_operand_address_as (FILE *file, rtx addr,
18318 addr_space_t as, bool no_rip)
18320 struct ix86_address parts;
18321 rtx base, index, disp;
18327 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18329 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18330 gcc_assert (parts.index == NULL_RTX);
18331 parts.index = XVECEXP (addr, 0, 1);
18332 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18333 addr = XVECEXP (addr, 0, 0);
18336 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18338 gcc_assert (TARGET_64BIT);
18339 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18343 ok = ix86_decompose_address (addr, &parts);
18348 index = parts.index;
18350 scale = parts.scale;
18352 if (ADDR_SPACE_GENERIC_P (as))
18355 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18357 if (!ADDR_SPACE_GENERIC_P (as))
18359 if (ASSEMBLER_DIALECT == ASM_ATT)
18364 case ADDR_SPACE_SEG_FS:
18365 fputs ("fs:", file);
18367 case ADDR_SPACE_SEG_GS:
18368 fputs ("gs:", file);
18371 gcc_unreachable ();
18375 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18376 if (TARGET_64BIT && !base && !index && !no_rip)
18380 if (GET_CODE (disp) == CONST
18381 && GET_CODE (XEXP (disp, 0)) == PLUS
18382 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18383 symbol = XEXP (XEXP (disp, 0), 0);
18385 if (GET_CODE (symbol) == LABEL_REF
18386 || (GET_CODE (symbol) == SYMBOL_REF
18387 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18391 if (!base && !index)
18393 /* Displacement only requires special attention. */
18394 if (CONST_INT_P (disp))
18396 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18397 fputs ("ds:", file);
18398 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18400 /* Load the external function address via the GOT slot to avoid PLT. */
18401 else if (GET_CODE (disp) == CONST
18402 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18403 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18404 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18405 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18406 output_pic_addr_const (file, disp, 0);
18408 output_pic_addr_const (file, disp, 0);
18410 output_addr_const (file, disp);
18414 /* Print SImode register names to force addr32 prefix. */
18415 if (SImode_address_operand (addr, VOIDmode))
18419 gcc_assert (TARGET_64BIT);
18420 switch (GET_CODE (addr))
18423 gcc_assert (GET_MODE (addr) == SImode);
18424 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18428 gcc_assert (GET_MODE (addr) == DImode);
18431 gcc_unreachable ();
18434 gcc_assert (!code);
18440 && CONST_INT_P (disp)
18441 && INTVAL (disp) < -16*1024*1024)
18443 /* X32 runs in 64-bit mode, where displacement, DISP, in
18444 address DISP(%r64), is encoded as 32-bit immediate sign-
18445 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18446 address is %r64 + 0xffffffffbffffd00. When %r64 <
18447 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18448 which is invalid for x32. The correct address is %r64
18449 - 0x40000300 == 0xf7ffdd64. To properly encode
18450 -0x40000300(%r64) for x32, we zero-extend negative
18451 displacement by forcing addr32 prefix which truncates
18452 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18453 zero-extend all negative displacements, including -1(%rsp).
18454 However, for small negative displacements, sign-extension
18455 won't cause overflow. We only zero-extend negative
18456 displacements if they < -16*1024*1024, which is also used
18457 to check legitimate address displacements for PIC. */
18461 /* Since the upper 32 bits of RSP are always zero for x32,
18462 we can encode %esp as %rsp to avoid 0x67 prefix if
18463 there is no index register. */
18464 if (TARGET_X32 && Pmode == SImode
18465 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18468 if (ASSEMBLER_DIALECT == ASM_ATT)
18473 output_pic_addr_const (file, disp, 0);
18474 else if (GET_CODE (disp) == LABEL_REF)
18475 output_asm_label (disp);
18477 output_addr_const (file, disp);
18482 print_reg (base, code, file);
18486 print_reg (index, vsib ? 0 : code, file);
18487 if (scale != 1 || vsib)
18488 fprintf (file, ",%d", scale);
18494 rtx offset = NULL_RTX;
18498 /* Pull out the offset of a symbol; print any symbol itself. */
18499 if (GET_CODE (disp) == CONST
18500 && GET_CODE (XEXP (disp, 0)) == PLUS
18501 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18503 offset = XEXP (XEXP (disp, 0), 1);
18504 disp = gen_rtx_CONST (VOIDmode,
18505 XEXP (XEXP (disp, 0), 0));
18509 output_pic_addr_const (file, disp, 0);
18510 else if (GET_CODE (disp) == LABEL_REF)
18511 output_asm_label (disp);
18512 else if (CONST_INT_P (disp))
18515 output_addr_const (file, disp);
18521 print_reg (base, code, file);
18524 if (INTVAL (offset) >= 0)
18526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18537 print_reg (index, vsib ? 0 : code, file);
18538 if (scale != 1 || vsib)
18539 fprintf (file, "*%d", scale);
18547 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
18549 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
18552 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18555 i386_asm_output_addr_const_extra (FILE *file, rtx x)
18559 if (GET_CODE (x) != UNSPEC)
18562 op = XVECEXP (x, 0, 0);
18563 switch (XINT (x, 1))
18565 case UNSPEC_GOTOFF:
18566 output_addr_const (file, op);
18567 fputs ("@gotoff", file);
18569 case UNSPEC_GOTTPOFF:
18570 output_addr_const (file, op);
18571 /* FIXME: This might be @TPOFF in Sun ld. */
18572 fputs ("@gottpoff", file);
18575 output_addr_const (file, op);
18576 fputs ("@tpoff", file);
18578 case UNSPEC_NTPOFF:
18579 output_addr_const (file, op);
18581 fputs ("@tpoff", file);
18583 fputs ("@ntpoff", file);
18585 case UNSPEC_DTPOFF:
18586 output_addr_const (file, op);
18587 fputs ("@dtpoff", file);
18589 case UNSPEC_GOTNTPOFF:
18590 output_addr_const (file, op);
18592 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
18593 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
18595 fputs ("@gotntpoff", file);
18597 case UNSPEC_INDNTPOFF:
18598 output_addr_const (file, op);
18599 fputs ("@indntpoff", file);
18602 case UNSPEC_MACHOPIC_OFFSET:
18603 output_addr_const (file, op);
18605 machopic_output_function_base_name (file);
18616 /* Split one or more double-mode RTL references into pairs of half-mode
18617 references. The RTL can be REG, offsettable MEM, integer constant, or
18618 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18619 split and "num" is its length. lo_half and hi_half are output arrays
18620 that parallel "operands". */
18623 split_double_mode (machine_mode mode, rtx operands[],
18624 int num, rtx lo_half[], rtx hi_half[])
18626 machine_mode half_mode;
18632 half_mode = DImode;
18635 half_mode = SImode;
18638 gcc_unreachable ();
18641 byte = GET_MODE_SIZE (half_mode);
18645 rtx op = operands[num];
18647 /* simplify_subreg refuse to split volatile memory addresses,
18648 but we still have to handle it. */
18651 lo_half[num] = adjust_address (op, half_mode, 0);
18652 hi_half[num] = adjust_address (op, half_mode, byte);
18656 lo_half[num] = simplify_gen_subreg (half_mode, op,
18657 GET_MODE (op) == VOIDmode
18658 ? mode : GET_MODE (op), 0);
18659 hi_half[num] = simplify_gen_subreg (half_mode, op,
18660 GET_MODE (op) == VOIDmode
18661 ? mode : GET_MODE (op), byte);
18666 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18667 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18668 is the expression of the binary operation. The output may either be
18669 emitted here, or returned to the caller, like all output_* functions.
18671 There is no guarantee that the operands are the same mode, as they
18672 might be within FLOAT or FLOAT_EXTEND expressions. */
18674 #ifndef SYSV386_COMPAT
18675 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18676 wants to fix the assemblers because that causes incompatibility
18677 with gcc. No-one wants to fix gcc because that causes
18678 incompatibility with assemblers... You can use the option of
18679 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18680 #define SYSV386_COMPAT 1
18684 output_387_binary_op (rtx_insn *insn, rtx *operands)
18686 static char buf[40];
18689 = (SSE_REG_P (operands[0])
18690 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
18694 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18695 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18702 switch (GET_CODE (operands[3]))
18713 gcc_unreachable ();
18720 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
18724 p = "\t{%2, %1, %0|%0, %1, %2}";
18726 p = "\t{%2, %0|%0, %2}";
18732 /* Even if we do not want to check the inputs, this documents input
18733 constraints. Which helps in understanding the following code. */
18736 if (STACK_REG_P (operands[0])
18737 && ((REG_P (operands[1])
18738 && REGNO (operands[0]) == REGNO (operands[1])
18739 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
18740 || (REG_P (operands[2])
18741 && REGNO (operands[0]) == REGNO (operands[2])
18742 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
18743 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
18746 gcc_unreachable ();
18749 switch (GET_CODE (operands[3]))
18753 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
18754 std::swap (operands[1], operands[2]);
18756 /* know operands[0] == operands[1]. */
18758 if (MEM_P (operands[2]))
18764 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18766 if (STACK_TOP_P (operands[0]))
18767 /* How is it that we are storing to a dead operand[2]?
18768 Well, presumably operands[1] is dead too. We can't
18769 store the result to st(0) as st(0) gets popped on this
18770 instruction. Instead store to operands[2] (which I
18771 think has to be st(1)). st(1) will be popped later.
18772 gcc <= 2.8.1 didn't have this check and generated
18773 assembly code that the Unixware assembler rejected. */
18774 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18776 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18780 if (STACK_TOP_P (operands[0]))
18781 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18783 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18788 if (MEM_P (operands[1]))
18794 if (MEM_P (operands[2]))
18800 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18803 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18804 derived assemblers, confusingly reverse the direction of
18805 the operation for fsub{r} and fdiv{r} when the
18806 destination register is not st(0). The Intel assembler
18807 doesn't have this brain damage. Read !SYSV386_COMPAT to
18808 figure out what the hardware really does. */
18809 if (STACK_TOP_P (operands[0]))
18810 p = "{p\t%0, %2|rp\t%2, %0}";
18812 p = "{rp\t%2, %0|p\t%0, %2}";
18814 if (STACK_TOP_P (operands[0]))
18815 /* As above for fmul/fadd, we can't store to st(0). */
18816 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18818 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18823 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18826 if (STACK_TOP_P (operands[0]))
18827 p = "{rp\t%0, %1|p\t%1, %0}";
18829 p = "{p\t%1, %0|rp\t%0, %1}";
18831 if (STACK_TOP_P (operands[0]))
18832 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18834 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18839 if (STACK_TOP_P (operands[0]))
18841 if (STACK_TOP_P (operands[1]))
18842 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18844 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18847 else if (STACK_TOP_P (operands[1]))
18850 p = "{\t%1, %0|r\t%0, %1}";
18852 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18858 p = "{r\t%2, %0|\t%0, %2}";
18860 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18866 gcc_unreachable ();
18873 /* Return needed mode for entity in optimize_mode_switching pass. */
18876 ix86_dirflag_mode_needed (rtx_insn *insn)
18880 if (cfun->machine->func_type == TYPE_NORMAL)
18881 return X86_DIRFLAG_ANY;
18883 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18884 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
18887 if (recog_memoized (insn) < 0)
18888 return X86_DIRFLAG_ANY;
18890 if (get_attr_type (insn) == TYPE_STR)
18892 /* Emit cld instruction if stringops are used in the function. */
18893 if (cfun->machine->func_type == TYPE_NORMAL)
18894 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
18896 return X86_DIRFLAG_RESET;
18899 return X86_DIRFLAG_ANY;
18902 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
18905 ix86_check_avx_upper_register (const_rtx exp)
18907 return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
18910 /* Return needed mode for entity in optimize_mode_switching pass. */
18913 ix86_avx_u128_mode_needed (rtx_insn *insn)
18919 /* Needed mode is set to AVX_U128_CLEAN if there are
18920 no 256bit or 512bit modes used in function arguments. */
18921 for (link = CALL_INSN_FUNCTION_USAGE (insn);
18923 link = XEXP (link, 1))
18925 if (GET_CODE (XEXP (link, 0)) == USE)
18927 rtx arg = XEXP (XEXP (link, 0), 0);
18929 if (ix86_check_avx_upper_register (arg))
18930 return AVX_U128_DIRTY;
18934 return AVX_U128_CLEAN;
18937 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
18938 Hardware changes state only when a 256bit register is written to,
18939 but we need to prevent the compiler from moving optimal insertion
18940 point above eventual read from 256bit or 512 bit register. */
18941 subrtx_iterator::array_type array;
18942 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
18943 if (ix86_check_avx_upper_register (*iter))
18944 return AVX_U128_DIRTY;
18946 return AVX_U128_ANY;
18949 /* Return mode that i387 must be switched into
18950 prior to the execution of insn. */
18953 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18955 enum attr_i387_cw mode;
18957 /* The mode UNINITIALIZED is used to store control word after a
18958 function call or ASM pattern. The mode ANY specify that function
18959 has no requirements on the control word and make no changes in the
18960 bits we are interested in. */
18963 || (NONJUMP_INSN_P (insn)
18964 && (asm_noperands (PATTERN (insn)) >= 0
18965 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18966 return I387_CW_UNINITIALIZED;
18968 if (recog_memoized (insn) < 0)
18969 return I387_CW_ANY;
18971 mode = get_attr_i387_cw (insn);
18976 if (mode == I387_CW_TRUNC)
18981 if (mode == I387_CW_FLOOR)
18986 if (mode == I387_CW_CEIL)
18991 gcc_unreachable ();
18994 return I387_CW_ANY;
18997 /* Return mode that entity must be switched into
18998 prior to the execution of insn. */
19001 ix86_mode_needed (int entity, rtx_insn *insn)
19006 return ix86_dirflag_mode_needed (insn);
19008 return ix86_avx_u128_mode_needed (insn);
19012 return ix86_i387_mode_needed (entity, insn);
19014 gcc_unreachable ();
19019 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
19022 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
19024 if (ix86_check_avx_upper_register (dest))
19026 bool *used = (bool *) data;
19031 /* Calculate mode of upper 128bit AVX registers after the insn. */
19034 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
19036 rtx pat = PATTERN (insn);
19038 if (vzeroupper_pattern (pat, VOIDmode)
19039 || vzeroall_pattern (pat, VOIDmode))
19040 return AVX_U128_CLEAN;
19042 /* We know that state is clean after CALL insn if there are no
19043 256bit or 512bit registers used in the function return register. */
19046 bool avx_upper_reg_found = false;
19047 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
19049 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
19052 /* Otherwise, return current mode. Remember that if insn
19053 references AVX 256bit or 512bit registers, the mode was already
19054 changed to DIRTY from MODE_NEEDED. */
19058 /* Return the mode that an insn results in. */
19061 ix86_mode_after (int entity, int mode, rtx_insn *insn)
19068 return ix86_avx_u128_mode_after (mode, insn);
19074 gcc_unreachable ();
19079 ix86_dirflag_mode_entry (void)
19081 /* For TARGET_CLD or in the interrupt handler we can't assume
19082 direction flag state at function entry. */
19084 || cfun->machine->func_type != TYPE_NORMAL)
19085 return X86_DIRFLAG_ANY;
19087 return X86_DIRFLAG_RESET;
19091 ix86_avx_u128_mode_entry (void)
19095 /* Entry mode is set to AVX_U128_DIRTY if there are
19096 256bit or 512bit modes used in function arguments. */
19097 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
19098 arg = TREE_CHAIN (arg))
19100 rtx incoming = DECL_INCOMING_RTL (arg);
19102 if (incoming && ix86_check_avx_upper_register (incoming))
19103 return AVX_U128_DIRTY;
19106 return AVX_U128_CLEAN;
19109 /* Return a mode that ENTITY is assumed to be
19110 switched to at function entry. */
19113 ix86_mode_entry (int entity)
19118 return ix86_dirflag_mode_entry ();
19120 return ix86_avx_u128_mode_entry ();
19124 return I387_CW_ANY;
19126 gcc_unreachable ();
19131 ix86_avx_u128_mode_exit (void)
19133 rtx reg = crtl->return_rtx;
19135 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
19136 or 512 bit modes used in the function return register. */
19137 if (reg && ix86_check_avx_upper_register (reg))
19138 return AVX_U128_DIRTY;
19140 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
19141 modes used in function arguments, otherwise return AVX_U128_CLEAN.
19143 return ix86_avx_u128_mode_entry ();
19146 /* Return a mode that ENTITY is assumed to be
19147 switched to at function exit. */
19150 ix86_mode_exit (int entity)
19155 return X86_DIRFLAG_ANY;
19157 return ix86_avx_u128_mode_exit ();
19161 return I387_CW_ANY;
19163 gcc_unreachable ();
19168 ix86_mode_priority (int, int n)
19173 /* Output code to initialize control word copies used by trunc?f?i and
19174 rounding patterns. CURRENT_MODE is set to current control word,
19175 while NEW_MODE is set to new control word. */
19178 emit_i387_cw_initialization (int mode)
19180 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
19183 enum ix86_stack_slot slot;
19185 rtx reg = gen_reg_rtx (HImode);
19187 emit_insn (gen_x86_fnstcw_1 (stored_mode));
19188 emit_move_insn (reg, copy_rtx (stored_mode));
19192 case I387_CW_TRUNC:
19193 /* round toward zero (truncate) */
19194 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
19195 slot = SLOT_CW_TRUNC;
19198 case I387_CW_FLOOR:
19199 /* round down toward -oo */
19200 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19201 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
19202 slot = SLOT_CW_FLOOR;
19206 /* round up toward +oo */
19207 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19208 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
19209 slot = SLOT_CW_CEIL;
19213 gcc_unreachable ();
19216 gcc_assert (slot < MAX_386_STACK_LOCALS);
19218 new_mode = assign_386_stack_local (HImode, slot);
19219 emit_move_insn (new_mode, reg);
19222 /* Generate one or more insns to set ENTITY to MODE. */
19225 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19226 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
19231 if (mode == X86_DIRFLAG_RESET)
19232 emit_insn (gen_cld ());
19235 if (mode == AVX_U128_CLEAN)
19236 emit_insn (gen_avx_vzeroupper ());
19241 if (mode != I387_CW_ANY
19242 && mode != I387_CW_UNINITIALIZED)
19243 emit_i387_cw_initialization (mode);
19246 gcc_unreachable ();
19250 /* Output code for INSN to convert a float to a signed int. OPERANDS
19251 are the insn operands. The output may be [HSD]Imode and the input
19252 operand may be [SDX]Fmode. */
19255 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19257 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19258 bool dimode_p = GET_MODE (operands[0]) == DImode;
19259 int round_mode = get_attr_i387_cw (insn);
19261 static char buf[40];
19264 /* Jump through a hoop or two for DImode, since the hardware has no
19265 non-popping instruction. We used to do this a different way, but
19266 that was somewhat fragile and broke with post-reload splitters. */
19267 if ((dimode_p || fisttp) && !stack_top_dies)
19268 output_asm_insn ("fld\t%y1", operands);
19270 gcc_assert (STACK_TOP_P (operands[1]));
19271 gcc_assert (MEM_P (operands[0]));
19272 gcc_assert (GET_MODE (operands[1]) != TFmode);
19275 return "fisttp%Z0\t%0";
19277 strcpy (buf, "fist");
19279 if (round_mode != I387_CW_ANY)
19280 output_asm_insn ("fldcw\t%3", operands);
19283 strcat (buf, p + !(stack_top_dies || dimode_p));
19285 output_asm_insn (buf, operands);
19287 if (round_mode != I387_CW_ANY)
19288 output_asm_insn ("fldcw\t%2", operands);
19293 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19294 have the values zero or one, indicates the ffreep insn's operand
19295 from the OPERANDS array. */
19297 static const char *
19298 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19300 if (TARGET_USE_FFREEP)
19301 #ifdef HAVE_AS_IX86_FFREEP
19302 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19305 static char retval[32];
19306 int regno = REGNO (operands[opno]);
19308 gcc_assert (STACK_REGNO_P (regno));
19310 regno -= FIRST_STACK_REG;
19312 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19317 return opno ? "fstp\t%y1" : "fstp\t%y0";
19321 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19322 should be used. UNORDERED_P is true when fucom should be used. */
19325 output_fp_compare (rtx_insn *insn, rtx *operands,
19326 bool eflags_p, bool unordered_p)
19328 rtx *xops = eflags_p ? &operands[0] : &operands[1];
19329 bool stack_top_dies;
19331 static char buf[40];
19334 gcc_assert (STACK_TOP_P (xops[0]));
19336 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19340 p = unordered_p ? "fucomi" : "fcomi";
19343 p = "p\t{%y1, %0|%0, %y1}";
19344 strcat (buf, p + !stack_top_dies);
19349 if (STACK_REG_P (xops[1])
19351 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19353 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19355 /* If both the top of the 387 stack die, and the other operand
19356 is also a stack register that dies, then this must be a
19357 `fcompp' float compare. */
19358 p = unordered_p ? "fucompp" : "fcompp";
19361 else if (const0_operand (xops[1], VOIDmode))
19363 gcc_assert (!unordered_p);
19364 strcpy (buf, "ftst");
19368 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19370 gcc_assert (!unordered_p);
19374 p = unordered_p ? "fucom" : "fcom";
19379 strcat (buf, p + !stack_top_dies);
19382 output_asm_insn (buf, operands);
19383 return "fnstsw\t%0";
19387 ix86_output_addr_vec_elt (FILE *file, int value)
19389 const char *directive = ASM_LONG;
19393 directive = ASM_QUAD;
19395 gcc_assert (!TARGET_64BIT);
19398 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19402 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19404 const char *directive = ASM_LONG;
19407 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19408 directive = ASM_QUAD;
19410 gcc_assert (!TARGET_64BIT);
19412 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19413 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19414 fprintf (file, "%s%s%d-%s%d\n",
19415 directive, LPREFIX, value, LPREFIX, rel);
19417 else if (TARGET_MACHO)
19419 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19420 machopic_output_function_base_name (file);
19424 else if (HAVE_AS_GOTOFF_IN_DATA)
19425 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19427 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19428 GOT_SYMBOL_NAME, LPREFIX, value);
19431 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19435 ix86_expand_clear (rtx dest)
19439 /* We play register width games, which are only valid after reload. */
19440 gcc_assert (reload_completed);
19442 /* Avoid HImode and its attendant prefix byte. */
19443 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
19444 dest = gen_rtx_REG (SImode, REGNO (dest));
19445 tmp = gen_rtx_SET (dest, const0_rtx);
19447 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
19449 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19450 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
19457 ix86_expand_move (machine_mode mode, rtx operands[])
19460 rtx tmp, addend = NULL_RTX;
19461 enum tls_model model;
19466 switch (GET_CODE (op1))
19469 tmp = XEXP (op1, 0);
19471 if (GET_CODE (tmp) != PLUS
19472 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19475 op1 = XEXP (tmp, 0);
19476 addend = XEXP (tmp, 1);
19480 model = SYMBOL_REF_TLS_MODEL (op1);
19483 op1 = legitimize_tls_address (op1, model, true);
19484 else if (ix86_force_load_from_GOT_p (op1))
19486 /* Load the external function address via GOT slot to avoid PLT. */
19487 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
19491 op1 = gen_rtx_CONST (Pmode, op1);
19492 op1 = gen_const_mem (Pmode, op1);
19493 set_mem_alias_set (op1, ix86_GOT_alias_set ());
19497 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
19513 op1 = force_operand (op1, NULL_RTX);
19514 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
19515 op0, 1, OPTAB_DIRECT);
19518 op1 = force_operand (op1, op0);
19523 op1 = convert_to_mode (mode, op1, 1);
19529 if ((flag_pic || MACHOPIC_INDIRECT)
19530 && symbolic_operand (op1, mode))
19532 if (TARGET_MACHO && !TARGET_64BIT)
19535 /* dynamic-no-pic */
19536 if (MACHOPIC_INDIRECT)
19538 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
19539 ? op0 : gen_reg_rtx (Pmode);
19540 op1 = machopic_indirect_data_reference (op1, temp);
19542 op1 = machopic_legitimize_pic_address (op1, mode,
19543 temp == op1 ? 0 : temp);
19545 if (op0 != op1 && GET_CODE (op0) != MEM)
19547 rtx insn = gen_rtx_SET (op0, op1);
19551 if (GET_CODE (op0) == MEM)
19552 op1 = force_reg (Pmode, op1);
19556 if (GET_CODE (temp) != REG)
19557 temp = gen_reg_rtx (Pmode);
19558 temp = legitimize_pic_address (op1, temp);
19563 /* dynamic-no-pic */
19569 op1 = force_reg (mode, op1);
19570 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
19572 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
19573 op1 = legitimize_pic_address (op1, reg);
19576 op1 = convert_to_mode (mode, op1, 1);
19583 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
19584 || !push_operand (op0, mode))
19586 op1 = force_reg (mode, op1);
19588 if (push_operand (op0, mode)
19589 && ! general_no_elim_operand (op1, mode))
19590 op1 = copy_to_mode_reg (mode, op1);
19592 /* Force large constants in 64bit compilation into register
19593 to get them CSEed. */
19594 if (can_create_pseudo_p ()
19595 && (mode == DImode) && TARGET_64BIT
19596 && immediate_operand (op1, mode)
19597 && !x86_64_zext_immediate_operand (op1, VOIDmode)
19598 && !register_operand (op0, mode)
19600 op1 = copy_to_mode_reg (mode, op1);
19602 if (can_create_pseudo_p ()
19603 && CONST_DOUBLE_P (op1))
19605 /* If we are loading a floating point constant to a register,
19606 force the value to memory now, since we'll get better code
19607 out the back end. */
19609 op1 = validize_mem (force_const_mem (mode, op1));
19610 if (!register_operand (op0, mode))
19612 rtx temp = gen_reg_rtx (mode);
19613 emit_insn (gen_rtx_SET (temp, op1));
19614 emit_move_insn (op0, temp);
19620 emit_insn (gen_rtx_SET (op0, op1));
19624 ix86_expand_vector_move (machine_mode mode, rtx operands[])
19626 rtx op0 = operands[0], op1 = operands[1];
19627 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19628 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19629 unsigned int align = (TARGET_IAMCU
19630 ? GET_MODE_BITSIZE (mode)
19631 : GET_MODE_ALIGNMENT (mode));
19633 if (push_operand (op0, VOIDmode))
19634 op0 = emit_move_resolve_push (mode, op0);
19636 /* Force constants other than zero into memory. We do not know how
19637 the instructions used to build constants modify the upper 64 bits
19638 of the register, once we have that information we may be able
19639 to handle some of them more efficiently. */
19640 if (can_create_pseudo_p ()
19641 && (CONSTANT_P (op1)
19643 && CONSTANT_P (SUBREG_REG (op1))))
19644 && ((register_operand (op0, mode)
19645 && !standard_sse_constant_p (op1, mode))
19646 /* ix86_expand_vector_move_misalign() does not like constants. */
19647 || (SSE_REG_MODE_P (mode)
19649 && MEM_ALIGN (op0) < align)))
19651 if (SUBREG_P (op1))
19653 machine_mode imode = GET_MODE (SUBREG_REG (op1));
19654 rtx r = force_const_mem (imode, SUBREG_REG (op1));
19656 r = validize_mem (r);
19658 r = force_reg (imode, SUBREG_REG (op1));
19659 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
19662 op1 = validize_mem (force_const_mem (mode, op1));
19665 /* We need to check memory alignment for SSE mode since attribute
19666 can make operands unaligned. */
19667 if (can_create_pseudo_p ()
19668 && SSE_REG_MODE_P (mode)
19669 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
19670 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
19674 /* ix86_expand_vector_move_misalign() does not like both
19675 arguments in memory. */
19676 if (!register_operand (op0, mode)
19677 && !register_operand (op1, mode))
19678 op1 = force_reg (mode, op1);
19680 tmp[0] = op0; tmp[1] = op1;
19681 ix86_expand_vector_move_misalign (mode, tmp);
19685 /* Make operand1 a register if it isn't already. */
19686 if (can_create_pseudo_p ()
19687 && !register_operand (op0, mode)
19688 && !register_operand (op1, mode))
19690 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
19694 emit_insn (gen_rtx_SET (op0, op1));
19697 /* Split 32-byte AVX unaligned load and store if needed. */
19700 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
19703 rtx (*extract) (rtx, rtx, rtx);
19706 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
19707 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
19709 emit_insn (gen_rtx_SET (op0, op1));
19713 rtx orig_op0 = NULL_RTX;
19714 mode = GET_MODE (op0);
19715 switch (GET_MODE_CLASS (mode))
19717 case MODE_VECTOR_INT:
19719 if (mode != V32QImode)
19724 op0 = gen_reg_rtx (V32QImode);
19727 op0 = gen_lowpart (V32QImode, op0);
19728 op1 = gen_lowpart (V32QImode, op1);
19732 case MODE_VECTOR_FLOAT:
19735 gcc_unreachable ();
19741 gcc_unreachable ();
19743 extract = gen_avx_vextractf128v32qi;
19747 extract = gen_avx_vextractf128v8sf;
19751 extract = gen_avx_vextractf128v4df;
19758 rtx r = gen_reg_rtx (mode);
19759 m = adjust_address (op1, mode, 0);
19760 emit_move_insn (r, m);
19761 m = adjust_address (op1, mode, 16);
19762 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
19763 emit_move_insn (op0, r);
19765 else if (MEM_P (op0))
19767 m = adjust_address (op0, mode, 0);
19768 emit_insn (extract (m, op1, const0_rtx));
19769 m = adjust_address (op0, mode, 16);
19770 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
19773 gcc_unreachable ();
19776 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19779 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19780 straight to ix86_expand_vector_move. */
19781 /* Code generation for scalar reg-reg moves of single and double precision data:
19782 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19786 if (x86_sse_partial_reg_dependency == true)
19791 Code generation for scalar loads of double precision data:
19792 if (x86_sse_split_regs == true)
19793 movlpd mem, reg (gas syntax)
19797 Code generation for unaligned packed loads of single precision data
19798 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19799 if (x86_sse_unaligned_move_optimal)
19802 if (x86_sse_partial_reg_dependency == true)
19814 Code generation for unaligned packed loads of double precision data
19815 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19816 if (x86_sse_unaligned_move_optimal)
19819 if (x86_sse_split_regs == true)
19832 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
19839 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19840 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
19842 emit_insn (gen_rtx_SET (op0, op1));
19848 if (GET_MODE_SIZE (mode) == 32)
19849 ix86_avx256_split_vector_move_misalign (op0, op1);
19851 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19852 emit_insn (gen_rtx_SET (op0, op1));
19856 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19857 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
19859 emit_insn (gen_rtx_SET (op0, op1));
19863 /* ??? If we have typed data, then it would appear that using
19864 movdqu is the only way to get unaligned data loaded with
19866 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19868 emit_insn (gen_rtx_SET (op0, op1));
19874 if (TARGET_SSE2 && mode == V2DFmode)
19878 /* When SSE registers are split into halves, we can avoid
19879 writing to the top half twice. */
19880 if (TARGET_SSE_SPLIT_REGS)
19882 emit_clobber (op0);
19887 /* ??? Not sure about the best option for the Intel chips.
19888 The following would seem to satisfy; the register is
19889 entirely cleared, breaking the dependency chain. We
19890 then store to the upper half, with a dependency depth
19891 of one. A rumor has it that Intel recommends two movsd
19892 followed by an unpacklpd, but this is unconfirmed. And
19893 given that the dependency depth of the unpacklpd would
19894 still be one, I'm not sure why this would be better. */
19895 zero = CONST0_RTX (V2DFmode);
19898 m = adjust_address (op1, DFmode, 0);
19899 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19900 m = adjust_address (op1, DFmode, 8);
19901 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19907 if (mode != V4SFmode)
19908 t = gen_reg_rtx (V4SFmode);
19912 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19913 emit_move_insn (t, CONST0_RTX (V4SFmode));
19917 m = adjust_address (op1, V2SFmode, 0);
19918 emit_insn (gen_sse_loadlps (t, t, m));
19919 m = adjust_address (op1, V2SFmode, 8);
19920 emit_insn (gen_sse_loadhps (t, t, m));
19921 if (mode != V4SFmode)
19922 emit_move_insn (op0, gen_lowpart (mode, t));
19925 else if (MEM_P (op0))
19927 if (TARGET_SSE2 && mode == V2DFmode)
19929 m = adjust_address (op0, DFmode, 0);
19930 emit_insn (gen_sse2_storelpd (m, op1));
19931 m = adjust_address (op0, DFmode, 8);
19932 emit_insn (gen_sse2_storehpd (m, op1));
19936 if (mode != V4SFmode)
19937 op1 = gen_lowpart (V4SFmode, op1);
19939 m = adjust_address (op0, V2SFmode, 0);
19940 emit_insn (gen_sse_storelps (m, op1));
19941 m = adjust_address (op0, V2SFmode, 8);
19942 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
19946 gcc_unreachable ();
19949 /* Helper function of ix86_fixup_binary_operands to canonicalize
19950 operand order. Returns true if the operands should be swapped. */
19953 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19956 rtx dst = operands[0];
19957 rtx src1 = operands[1];
19958 rtx src2 = operands[2];
19960 /* If the operation is not commutative, we can't do anything. */
19961 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
19962 && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
19965 /* Highest priority is that src1 should match dst. */
19966 if (rtx_equal_p (dst, src1))
19968 if (rtx_equal_p (dst, src2))
19971 /* Next highest priority is that immediate constants come second. */
19972 if (immediate_operand (src2, mode))
19974 if (immediate_operand (src1, mode))
19977 /* Lowest priority is that memory references should come second. */
19987 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19988 destination to use for the operation. If different from the true
19989 destination in operands[0], a copy operation will be required. */
19992 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19995 rtx dst = operands[0];
19996 rtx src1 = operands[1];
19997 rtx src2 = operands[2];
19999 /* Canonicalize operand order. */
20000 if (ix86_swap_binary_operands_p (code, mode, operands))
20002 /* It is invalid to swap operands of different modes. */
20003 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
20005 std::swap (src1, src2);
20008 /* Both source operands cannot be in memory. */
20009 if (MEM_P (src1) && MEM_P (src2))
20011 /* Optimization: Only read from memory once. */
20012 if (rtx_equal_p (src1, src2))
20014 src2 = force_reg (mode, src2);
20017 else if (rtx_equal_p (dst, src1))
20018 src2 = force_reg (mode, src2);
20020 src1 = force_reg (mode, src1);
20023 /* If the destination is memory, and we do not have matching source
20024 operands, do things in registers. */
20025 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20026 dst = gen_reg_rtx (mode);
20028 /* Source 1 cannot be a constant. */
20029 if (CONSTANT_P (src1))
20030 src1 = force_reg (mode, src1);
20032 /* Source 1 cannot be a non-matching memory. */
20033 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20034 src1 = force_reg (mode, src1);
20036 /* Improve address combine. */
20038 && GET_MODE_CLASS (mode) == MODE_INT
20040 src2 = force_reg (mode, src2);
20042 operands[1] = src1;
20043 operands[2] = src2;
20047 /* Similarly, but assume that the destination has already been
20048 set up properly. */
20051 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
20052 machine_mode mode, rtx operands[])
20054 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
20055 gcc_assert (dst == operands[0]);
20058 /* Attempt to expand a binary operator. Make the expansion closer to the
20059 actual machine, then just general_operand, which will allow 3 separate
20060 memory references (one output, two input) in a single insn. */
20063 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
20066 rtx src1, src2, dst, op, clob;
20068 dst = ix86_fixup_binary_operands (code, mode, operands);
20069 src1 = operands[1];
20070 src2 = operands[2];
20072 /* Emit the instruction. */
20074 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
20076 if (reload_completed
20078 && !rtx_equal_p (dst, src1))
20080 /* This is going to be an LEA; avoid splitting it later. */
20085 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20086 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20089 /* Fix up the destination if needed. */
20090 if (dst != operands[0])
20091 emit_move_insn (operands[0], dst);
20094 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20095 the given OPERANDS. */
20098 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
20101 rtx op1 = NULL_RTX, op2 = NULL_RTX;
20102 if (SUBREG_P (operands[1]))
20107 else if (SUBREG_P (operands[2]))
20112 /* Optimize (__m128i) d | (__m128i) e and similar code
20113 when d and e are float vectors into float vector logical
20114 insn. In C/C++ without using intrinsics there is no other way
20115 to express vector logical operation on float vectors than
20116 to cast them temporarily to integer vectors. */
20118 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20119 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
20120 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
20121 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
20122 && SUBREG_BYTE (op1) == 0
20123 && (GET_CODE (op2) == CONST_VECTOR
20124 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
20125 && SUBREG_BYTE (op2) == 0))
20126 && can_create_pseudo_p ())
20129 switch (GET_MODE (SUBREG_REG (op1)))
20137 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
20138 if (GET_CODE (op2) == CONST_VECTOR)
20140 op2 = gen_lowpart (GET_MODE (dst), op2);
20141 op2 = force_reg (GET_MODE (dst), op2);
20146 op2 = SUBREG_REG (operands[2]);
20147 if (!vector_operand (op2, GET_MODE (dst)))
20148 op2 = force_reg (GET_MODE (dst), op2);
20150 op1 = SUBREG_REG (op1);
20151 if (!vector_operand (op1, GET_MODE (dst)))
20152 op1 = force_reg (GET_MODE (dst), op1);
20153 emit_insn (gen_rtx_SET (dst,
20154 gen_rtx_fmt_ee (code, GET_MODE (dst),
20156 emit_move_insn (operands[0], gen_lowpart (mode, dst));
20162 if (!vector_operand (operands[1], mode))
20163 operands[1] = force_reg (mode, operands[1]);
20164 if (!vector_operand (operands[2], mode))
20165 operands[2] = force_reg (mode, operands[2]);
20166 ix86_fixup_binary_operands_no_copy (code, mode, operands);
20167 emit_insn (gen_rtx_SET (operands[0],
20168 gen_rtx_fmt_ee (code, mode, operands[1],
20172 /* Return TRUE or FALSE depending on whether the binary operator meets the
20173 appropriate constraints. */
20176 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
20179 rtx dst = operands[0];
20180 rtx src1 = operands[1];
20181 rtx src2 = operands[2];
20183 /* Both source operands cannot be in memory. */
20184 if (MEM_P (src1) && MEM_P (src2))
20187 /* Canonicalize operand order for commutative operators. */
20188 if (ix86_swap_binary_operands_p (code, mode, operands))
20189 std::swap (src1, src2);
20191 /* If the destination is memory, we must have a matching source operand. */
20192 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20195 /* Source 1 cannot be a constant. */
20196 if (CONSTANT_P (src1))
20199 /* Source 1 cannot be a non-matching memory. */
20200 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20201 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20202 return (code == AND
20205 || (TARGET_64BIT && mode == DImode))
20206 && satisfies_constraint_L (src2));
20211 /* Attempt to expand a unary operator. Make the expansion closer to the
20212 actual machine, then just general_operand, which will allow 2 separate
20213 memory references (one output, one input) in a single insn. */
20216 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20219 bool matching_memory = false;
20220 rtx src, dst, op, clob;
20225 /* If the destination is memory, and we do not have matching source
20226 operands, do things in registers. */
20229 if (rtx_equal_p (dst, src))
20230 matching_memory = true;
20232 dst = gen_reg_rtx (mode);
20235 /* When source operand is memory, destination must match. */
20236 if (MEM_P (src) && !matching_memory)
20237 src = force_reg (mode, src);
20239 /* Emit the instruction. */
20241 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20247 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20248 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20251 /* Fix up the destination if needed. */
20252 if (dst != operands[0])
20253 emit_move_insn (operands[0], dst);
20256 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20257 divisor are within the range [0-255]. */
20260 ix86_split_idivmod (machine_mode mode, rtx operands[],
20263 rtx_code_label *end_label, *qimode_label;
20266 rtx scratch, tmp0, tmp1, tmp2;
20267 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20268 rtx (*gen_zero_extend) (rtx, rtx);
20269 rtx (*gen_test_ccno_1) (rtx, rtx);
20274 if (GET_MODE (operands[0]) == SImode)
20276 if (GET_MODE (operands[1]) == SImode)
20277 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20280 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20281 gen_zero_extend = gen_zero_extendqisi2;
20286 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20287 gen_zero_extend = gen_zero_extendqidi2;
20289 gen_test_ccno_1 = gen_testsi_ccno_1;
20292 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20293 gen_test_ccno_1 = gen_testdi_ccno_1;
20294 gen_zero_extend = gen_zero_extendqidi2;
20297 gcc_unreachable ();
20300 end_label = gen_label_rtx ();
20301 qimode_label = gen_label_rtx ();
20303 scratch = gen_reg_rtx (mode);
20305 /* Use 8bit unsigned divimod if dividend and divisor are within
20306 the range [0-255]. */
20307 emit_move_insn (scratch, operands[2]);
20308 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20309 scratch, 1, OPTAB_DIRECT);
20310 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20311 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20312 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20313 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20314 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20316 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20317 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20318 JUMP_LABEL (insn) = qimode_label;
20320 /* Generate original signed/unsigned divimod. */
20321 div = gen_divmod4_1 (operands[0], operands[1],
20322 operands[2], operands[3]);
20325 /* Branch to the end. */
20326 emit_jump_insn (gen_jump (end_label));
20329 /* Generate 8bit unsigned divide. */
20330 emit_label (qimode_label);
20331 /* Don't use operands[0] for result of 8bit divide since not all
20332 registers support QImode ZERO_EXTRACT. */
20333 tmp0 = lowpart_subreg (HImode, scratch, mode);
20334 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20335 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20336 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20340 div = gen_rtx_DIV (mode, operands[2], operands[3]);
20341 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20345 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20346 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20348 if (mode == SImode)
20350 if (GET_MODE (operands[0]) != SImode)
20351 div = gen_rtx_ZERO_EXTEND (DImode, div);
20352 if (GET_MODE (operands[1]) != SImode)
20353 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20356 /* Extract remainder from AH. */
20357 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20358 tmp0, GEN_INT (8), GEN_INT (8));
20359 if (REG_P (operands[1]))
20360 insn = emit_move_insn (operands[1], tmp1);
20363 /* Need a new scratch register since the old one has result
20365 scratch = gen_reg_rtx (GET_MODE (operands[1]));
20366 emit_move_insn (scratch, tmp1);
20367 insn = emit_move_insn (operands[1], scratch);
20369 set_unique_reg_note (insn, REG_EQUAL, mod);
20371 /* Zero extend quotient from AL. */
20372 tmp1 = gen_lowpart (QImode, tmp0);
20373 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20374 set_unique_reg_note (insn, REG_EQUAL, div);
20376 emit_label (end_label);
20379 #define LEA_MAX_STALL (3)
20380 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20382 /* Increase given DISTANCE in half-cycles according to
20383 dependencies between PREV and NEXT instructions.
20384 Add 1 half-cycle if there is no dependency and
20385 go to next cycle if there is some dependecy. */
20387 static unsigned int
20388 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20392 if (!prev || !next)
20393 return distance + (distance & 1) + 2;
20395 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20396 return distance + 1;
20398 FOR_EACH_INSN_USE (use, next)
20399 FOR_EACH_INSN_DEF (def, prev)
20400 if (!DF_REF_IS_ARTIFICIAL (def)
20401 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20402 return distance + (distance & 1) + 2;
20404 return distance + 1;
20407 /* Function checks if instruction INSN defines register number
20408 REGNO1 or REGNO2. */
20411 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20416 FOR_EACH_INSN_DEF (def, insn)
20417 if (DF_REF_REG_DEF_P (def)
20418 && !DF_REF_IS_ARTIFICIAL (def)
20419 && (regno1 == DF_REF_REGNO (def)
20420 || regno2 == DF_REF_REGNO (def)))
20426 /* Function checks if instruction INSN uses register number
20427 REGNO as a part of address expression. */
20430 insn_uses_reg_mem (unsigned int regno, rtx insn)
20434 FOR_EACH_INSN_USE (use, insn)
20435 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20441 /* Search backward for non-agu definition of register number REGNO1
20442 or register number REGNO2 in basic block starting from instruction
20443 START up to head of basic block or instruction INSN.
20445 Function puts true value into *FOUND var if definition was found
20446 and false otherwise.
20448 Distance in half-cycles between START and found instruction or head
20449 of BB is added to DISTANCE and returned. */
20452 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20453 rtx_insn *insn, int distance,
20454 rtx_insn *start, bool *found)
20456 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20457 rtx_insn *prev = start;
20458 rtx_insn *next = NULL;
20464 && distance < LEA_SEARCH_THRESHOLD)
20466 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20468 distance = increase_distance (prev, next, distance);
20469 if (insn_defines_reg (regno1, regno2, prev))
20471 if (recog_memoized (prev) < 0
20472 || get_attr_type (prev) != TYPE_LEA)
20481 if (prev == BB_HEAD (bb))
20484 prev = PREV_INSN (prev);
20490 /* Search backward for non-agu definition of register number REGNO1
20491 or register number REGNO2 in INSN's basic block until
20492 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20493 2. Reach neighbor BBs boundary, or
20494 3. Reach agu definition.
20495 Returns the distance between the non-agu definition point and INSN.
20496 If no definition point, returns -1. */
20499 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
20502 basic_block bb = BLOCK_FOR_INSN (insn);
20504 bool found = false;
20506 if (insn != BB_HEAD (bb))
20507 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
20508 distance, PREV_INSN (insn),
20511 if (!found && distance < LEA_SEARCH_THRESHOLD)
20515 bool simple_loop = false;
20517 FOR_EACH_EDGE (e, ei, bb->preds)
20520 simple_loop = true;
20525 distance = distance_non_agu_define_in_bb (regno1, regno2,
20527 BB_END (bb), &found);
20530 int shortest_dist = -1;
20531 bool found_in_bb = false;
20533 FOR_EACH_EDGE (e, ei, bb->preds)
20536 = distance_non_agu_define_in_bb (regno1, regno2,
20542 if (shortest_dist < 0)
20543 shortest_dist = bb_dist;
20544 else if (bb_dist > 0)
20545 shortest_dist = MIN (bb_dist, shortest_dist);
20551 distance = shortest_dist;
20555 /* get_attr_type may modify recog data. We want to make sure
20556 that recog data is valid for instruction INSN, on which
20557 distance_non_agu_define is called. INSN is unchanged here. */
20558 extract_insn_cached (insn);
20563 return distance >> 1;
20566 /* Return the distance in half-cycles between INSN and the next
20567 insn that uses register number REGNO in memory address added
20568 to DISTANCE. Return -1 if REGNO0 is set.
20570 Put true value into *FOUND if register usage was found and
20572 Put true value into *REDEFINED if register redefinition was
20573 found and false otherwise. */
20576 distance_agu_use_in_bb (unsigned int regno,
20577 rtx_insn *insn, int distance, rtx_insn *start,
20578 bool *found, bool *redefined)
20580 basic_block bb = NULL;
20581 rtx_insn *next = start;
20582 rtx_insn *prev = NULL;
20585 *redefined = false;
20587 if (start != NULL_RTX)
20589 bb = BLOCK_FOR_INSN (start);
20590 if (start != BB_HEAD (bb))
20591 /* If insn and start belong to the same bb, set prev to insn,
20592 so the call to increase_distance will increase the distance
20593 between insns by 1. */
20599 && distance < LEA_SEARCH_THRESHOLD)
20601 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
20603 distance = increase_distance(prev, next, distance);
20604 if (insn_uses_reg_mem (regno, next))
20606 /* Return DISTANCE if OP0 is used in memory
20607 address in NEXT. */
20612 if (insn_defines_reg (regno, INVALID_REGNUM, next))
20614 /* Return -1 if OP0 is set in NEXT. */
20622 if (next == BB_END (bb))
20625 next = NEXT_INSN (next);
20631 /* Return the distance between INSN and the next insn that uses
20632 register number REGNO0 in memory address. Return -1 if no such
20633 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20636 distance_agu_use (unsigned int regno0, rtx_insn *insn)
20638 basic_block bb = BLOCK_FOR_INSN (insn);
20640 bool found = false;
20641 bool redefined = false;
20643 if (insn != BB_END (bb))
20644 distance = distance_agu_use_in_bb (regno0, insn, distance,
20646 &found, &redefined);
20648 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
20652 bool simple_loop = false;
20654 FOR_EACH_EDGE (e, ei, bb->succs)
20657 simple_loop = true;
20662 distance = distance_agu_use_in_bb (regno0, insn,
20663 distance, BB_HEAD (bb),
20664 &found, &redefined);
20667 int shortest_dist = -1;
20668 bool found_in_bb = false;
20669 bool redefined_in_bb = false;
20671 FOR_EACH_EDGE (e, ei, bb->succs)
20674 = distance_agu_use_in_bb (regno0, insn,
20675 distance, BB_HEAD (e->dest),
20676 &found_in_bb, &redefined_in_bb);
20679 if (shortest_dist < 0)
20680 shortest_dist = bb_dist;
20681 else if (bb_dist > 0)
20682 shortest_dist = MIN (bb_dist, shortest_dist);
20688 distance = shortest_dist;
20692 if (!found || redefined)
20695 return distance >> 1;
20698 /* Define this macro to tune LEA priority vs ADD, it take effect when
20699 there is a dilemma of choicing LEA or ADD
20700 Negative value: ADD is more preferred than LEA
20702 Positive value: LEA is more preferred than ADD*/
20703 #define IX86_LEA_PRIORITY 0
20705 /* Return true if usage of lea INSN has performance advantage
20706 over a sequence of instructions. Instructions sequence has
20707 SPLIT_COST cycles higher latency than lea latency. */
20710 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
20711 unsigned int regno2, int split_cost, bool has_scale)
20713 int dist_define, dist_use;
20715 /* For Silvermont if using a 2-source or 3-source LEA for
20716 non-destructive destination purposes, or due to wanting
20717 ability to use SCALE, the use of LEA is justified. */
20718 if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
20719 || TARGET_TREMONT || TARGET_INTEL)
20723 if (split_cost < 1)
20725 if (regno0 == regno1 || regno0 == regno2)
20730 dist_define = distance_non_agu_define (regno1, regno2, insn);
20731 dist_use = distance_agu_use (regno0, insn);
20733 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
20735 /* If there is no non AGU operand definition, no AGU
20736 operand usage and split cost is 0 then both lea
20737 and non lea variants have same priority. Currently
20738 we prefer lea for 64 bit code and non lea on 32 bit
20740 if (dist_use < 0 && split_cost == 0)
20741 return TARGET_64BIT || IX86_LEA_PRIORITY;
20746 /* With longer definitions distance lea is more preferable.
20747 Here we change it to take into account splitting cost and
20749 dist_define += split_cost + IX86_LEA_PRIORITY;
20751 /* If there is no use in memory addess then we just check
20752 that split cost exceeds AGU stall. */
20754 return dist_define > LEA_MAX_STALL;
20756 /* If this insn has both backward non-agu dependence and forward
20757 agu dependence, the one with short distance takes effect. */
20758 return dist_define >= dist_use;
20761 /* Return true if it is legal to clobber flags by INSN and
20762 false otherwise. */
20765 ix86_ok_to_clobber_flags (rtx_insn *insn)
20767 basic_block bb = BLOCK_FOR_INSN (insn);
20773 if (NONDEBUG_INSN_P (insn))
20775 FOR_EACH_INSN_USE (use, insn)
20776 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20779 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20783 if (insn == BB_END (bb))
20786 insn = NEXT_INSN (insn);
20789 live = df_get_live_out(bb);
20790 return !REGNO_REG_SET_P (live, FLAGS_REG);
20793 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20794 move and add to avoid AGU stalls. */
20797 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20799 unsigned int regno0, regno1, regno2;
20801 /* Check if we need to optimize. */
20802 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20805 /* Check it is correct to split here. */
20806 if (!ix86_ok_to_clobber_flags(insn))
20809 regno0 = true_regnum (operands[0]);
20810 regno1 = true_regnum (operands[1]);
20811 regno2 = true_regnum (operands[2]);
20813 /* We need to split only adds with non destructive
20814 destination operand. */
20815 if (regno0 == regno1 || regno0 == regno2)
20818 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20821 /* Return true if we should emit lea instruction instead of mov
20825 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20827 unsigned int regno0, regno1;
20829 /* Check if we need to optimize. */
20830 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20833 /* Use lea for reg to reg moves only. */
20834 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20837 regno0 = true_regnum (operands[0]);
20838 regno1 = true_regnum (operands[1]);
20840 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20843 /* Return true if we need to split lea into a sequence of
20844 instructions to avoid AGU stalls. */
20847 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20849 unsigned int regno0, regno1, regno2;
20851 struct ix86_address parts;
20854 /* Check we need to optimize. */
20855 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20858 /* The "at least two components" test below might not catch simple
20859 move or zero extension insns if parts.base is non-NULL and parts.disp
20860 is const0_rtx as the only components in the address, e.g. if the
20861 register is %rbp or %r13. As this test is much cheaper and moves or
20862 zero extensions are the common case, do this check first. */
20863 if (REG_P (operands[1])
20864 || (SImode_address_operand (operands[1], VOIDmode)
20865 && REG_P (XEXP (operands[1], 0))))
20868 /* Check if it is OK to split here. */
20869 if (!ix86_ok_to_clobber_flags (insn))
20872 ok = ix86_decompose_address (operands[1], &parts);
20875 /* There should be at least two components in the address. */
20876 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20877 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20880 /* We should not split into add if non legitimate pic
20881 operand is used as displacement. */
20882 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20885 regno0 = true_regnum (operands[0]) ;
20886 regno1 = INVALID_REGNUM;
20887 regno2 = INVALID_REGNUM;
20890 regno1 = true_regnum (parts.base);
20892 regno2 = true_regnum (parts.index);
20896 /* Compute how many cycles we will add to execution time
20897 if split lea into a sequence of instructions. */
20898 if (parts.base || parts.index)
20900 /* Have to use mov instruction if non desctructive
20901 destination form is used. */
20902 if (regno1 != regno0 && regno2 != regno0)
20905 /* Have to add index to base if both exist. */
20906 if (parts.base && parts.index)
20909 /* Have to use shift and adds if scale is 2 or greater. */
20910 if (parts.scale > 1)
20912 if (regno0 != regno1)
20914 else if (regno2 == regno0)
20917 split_cost += parts.scale;
20920 /* Have to use add instruction with immediate if
20921 disp is non zero. */
20922 if (parts.disp && parts.disp != const0_rtx)
20925 /* Subtract the price of lea. */
20929 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20933 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20934 matches destination. RTX includes clobber of FLAGS_REG. */
20937 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20942 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20943 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20945 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20948 /* Return true if regno1 def is nearest to the insn. */
20951 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20953 rtx_insn *prev = insn;
20954 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20958 while (prev && prev != start)
20960 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20962 prev = PREV_INSN (prev);
20965 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20967 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20969 prev = PREV_INSN (prev);
20972 /* None of the regs is defined in the bb. */
20976 /* Split lea instructions into a sequence of instructions
20977 which are executed on ALU to avoid AGU stalls.
20978 It is assumed that it is allowed to clobber flags register
20979 at lea position. */
20982 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20984 unsigned int regno0, regno1, regno2;
20985 struct ix86_address parts;
20989 ok = ix86_decompose_address (operands[1], &parts);
20992 target = gen_lowpart (mode, operands[0]);
20994 regno0 = true_regnum (target);
20995 regno1 = INVALID_REGNUM;
20996 regno2 = INVALID_REGNUM;
21000 parts.base = gen_lowpart (mode, parts.base);
21001 regno1 = true_regnum (parts.base);
21006 parts.index = gen_lowpart (mode, parts.index);
21007 regno2 = true_regnum (parts.index);
21011 parts.disp = gen_lowpart (mode, parts.disp);
21013 if (parts.scale > 1)
21015 /* Case r1 = r1 + ... */
21016 if (regno1 == regno0)
21018 /* If we have a case r1 = r1 + C * r2 then we
21019 should use multiplication which is very
21020 expensive. Assume cost model is wrong if we
21021 have such case here. */
21022 gcc_assert (regno2 != regno0);
21024 for (adds = parts.scale; adds > 0; adds--)
21025 ix86_emit_binop (PLUS, mode, target, parts.index);
21029 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
21030 if (regno0 != regno2)
21031 emit_insn (gen_rtx_SET (target, parts.index));
21033 /* Use shift for scaling. */
21034 ix86_emit_binop (ASHIFT, mode, target,
21035 GEN_INT (exact_log2 (parts.scale)));
21038 ix86_emit_binop (PLUS, mode, target, parts.base);
21040 if (parts.disp && parts.disp != const0_rtx)
21041 ix86_emit_binop (PLUS, mode, target, parts.disp);
21044 else if (!parts.base && !parts.index)
21046 gcc_assert(parts.disp);
21047 emit_insn (gen_rtx_SET (target, parts.disp));
21053 if (regno0 != regno2)
21054 emit_insn (gen_rtx_SET (target, parts.index));
21056 else if (!parts.index)
21058 if (regno0 != regno1)
21059 emit_insn (gen_rtx_SET (target, parts.base));
21063 if (regno0 == regno1)
21065 else if (regno0 == regno2)
21071 /* Find better operand for SET instruction, depending
21072 on which definition is farther from the insn. */
21073 if (find_nearest_reg_def (insn, regno1, regno2))
21074 tmp = parts.index, tmp1 = parts.base;
21076 tmp = parts.base, tmp1 = parts.index;
21078 emit_insn (gen_rtx_SET (target, tmp));
21080 if (parts.disp && parts.disp != const0_rtx)
21081 ix86_emit_binop (PLUS, mode, target, parts.disp);
21083 ix86_emit_binop (PLUS, mode, target, tmp1);
21087 ix86_emit_binop (PLUS, mode, target, tmp);
21090 if (parts.disp && parts.disp != const0_rtx)
21091 ix86_emit_binop (PLUS, mode, target, parts.disp);
21095 /* Return true if it is ok to optimize an ADD operation to LEA
21096 operation to avoid flag register consumation. For most processors,
21097 ADD is faster than LEA. For the processors like BONNELL, if the
21098 destination register of LEA holds an actual address which will be
21099 used soon, LEA is better and otherwise ADD is better. */
21102 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
21104 unsigned int regno0 = true_regnum (operands[0]);
21105 unsigned int regno1 = true_regnum (operands[1]);
21106 unsigned int regno2 = true_regnum (operands[2]);
21108 /* If a = b + c, (a!=b && a!=c), must use lea form. */
21109 if (regno0 != regno1 && regno0 != regno2)
21112 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21115 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
21118 /* Return true if destination reg of SET_BODY is shift count of
21122 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
21128 /* Retrieve destination of SET_BODY. */
21129 switch (GET_CODE (set_body))
21132 set_dest = SET_DEST (set_body);
21133 if (!set_dest || !REG_P (set_dest))
21137 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
21138 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
21146 /* Retrieve shift count of USE_BODY. */
21147 switch (GET_CODE (use_body))
21150 shift_rtx = XEXP (use_body, 1);
21153 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
21154 if (ix86_dep_by_shift_count_body (set_body,
21155 XVECEXP (use_body, 0, i)))
21163 && (GET_CODE (shift_rtx) == ASHIFT
21164 || GET_CODE (shift_rtx) == LSHIFTRT
21165 || GET_CODE (shift_rtx) == ASHIFTRT
21166 || GET_CODE (shift_rtx) == ROTATE
21167 || GET_CODE (shift_rtx) == ROTATERT))
21169 rtx shift_count = XEXP (shift_rtx, 1);
21171 /* Return true if shift count is dest of SET_BODY. */
21172 if (REG_P (shift_count))
21174 /* Add check since it can be invoked before register
21175 allocation in pre-reload schedule. */
21176 if (reload_completed
21177 && true_regnum (set_dest) == true_regnum (shift_count))
21179 else if (REGNO(set_dest) == REGNO(shift_count))
21187 /* Return true if destination reg of SET_INSN is shift count of
21191 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21193 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21194 PATTERN (use_insn));
21197 /* Return TRUE or FALSE depending on whether the unary operator meets the
21198 appropriate constraints. */
21201 ix86_unary_operator_ok (enum rtx_code,
21205 /* If one of operands is memory, source and destination must match. */
21206 if ((MEM_P (operands[0])
21207 || MEM_P (operands[1]))
21208 && ! rtx_equal_p (operands[0], operands[1]))
21213 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21214 are ok, keeping in mind the possible movddup alternative. */
21217 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21219 if (MEM_P (operands[0]))
21220 return rtx_equal_p (operands[0], operands[1 + high]);
21221 if (MEM_P (operands[1]) && MEM_P (operands[2]))
21222 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21226 /* Post-reload splitter for converting an SF or DFmode value in an
21227 SSE register into an unsigned SImode. */
21230 ix86_split_convert_uns_si_sse (rtx operands[])
21232 machine_mode vecmode;
21233 rtx value, large, zero_or_two31, input, two31, x;
21235 large = operands[1];
21236 zero_or_two31 = operands[2];
21237 input = operands[3];
21238 two31 = operands[4];
21239 vecmode = GET_MODE (large);
21240 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21242 /* Load up the value into the low element. We must ensure that the other
21243 elements are valid floats -- zero is the easiest such value. */
21246 if (vecmode == V4SFmode)
21247 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21249 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21253 input = gen_rtx_REG (vecmode, REGNO (input));
21254 emit_move_insn (value, CONST0_RTX (vecmode));
21255 if (vecmode == V4SFmode)
21256 emit_insn (gen_sse_movss (value, value, input));
21258 emit_insn (gen_sse2_movsd (value, value, input));
21261 emit_move_insn (large, two31);
21262 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21264 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21265 emit_insn (gen_rtx_SET (large, x));
21267 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21268 emit_insn (gen_rtx_SET (zero_or_two31, x));
21270 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21271 emit_insn (gen_rtx_SET (value, x));
21273 large = gen_rtx_REG (V4SImode, REGNO (large));
21274 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21276 x = gen_rtx_REG (V4SImode, REGNO (value));
21277 if (vecmode == V4SFmode)
21278 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21280 emit_insn (gen_sse2_cvttpd2dq (x, value));
21283 emit_insn (gen_xorv4si3 (value, value, large));
21286 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21287 Expects the 64-bit DImode to be supplied in a pair of integral
21288 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21289 -mfpmath=sse, !optimize_size only. */
21292 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21294 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21295 rtx int_xmm, fp_xmm;
21296 rtx biases, exponents;
21299 int_xmm = gen_reg_rtx (V4SImode);
21300 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21301 emit_insn (gen_movdi_to_sse (int_xmm, input));
21302 else if (TARGET_SSE_SPLIT_REGS)
21304 emit_clobber (int_xmm);
21305 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21309 x = gen_reg_rtx (V2DImode);
21310 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21311 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21314 x = gen_rtx_CONST_VECTOR (V4SImode,
21315 gen_rtvec (4, GEN_INT (0x43300000UL),
21316 GEN_INT (0x45300000UL),
21317 const0_rtx, const0_rtx));
21318 exponents = validize_mem (force_const_mem (V4SImode, x));
21320 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21321 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21323 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21324 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21325 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21326 (0x1.0p84 + double(fp_value_hi_xmm)).
21327 Note these exponents differ by 32. */
21329 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21331 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21332 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21333 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21334 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21335 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21336 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21337 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21338 biases = validize_mem (force_const_mem (V2DFmode, biases));
21339 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21341 /* Add the upper and lower DFmode values together. */
21343 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21346 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21347 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21348 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21351 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21354 /* Not used, but eases macroization of patterns. */
21356 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21358 gcc_unreachable ();
21361 /* Convert an unsigned SImode value into a DFmode. Only currently used
21362 for SSE, but applicable anywhere. */
21365 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21367 REAL_VALUE_TYPE TWO31r;
21370 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21371 NULL, 1, OPTAB_DIRECT);
21373 fp = gen_reg_rtx (DFmode);
21374 emit_insn (gen_floatsidf2 (fp, x));
21376 real_ldexp (&TWO31r, &dconst1, 31);
21377 x = const_double_from_real_value (TWO31r, DFmode);
21379 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21381 emit_move_insn (target, x);
21384 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21385 32-bit mode; otherwise we have a direct convert instruction. */
21388 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21390 REAL_VALUE_TYPE TWO32r;
21391 rtx fp_lo, fp_hi, x;
21393 fp_lo = gen_reg_rtx (DFmode);
21394 fp_hi = gen_reg_rtx (DFmode);
21396 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21398 real_ldexp (&TWO32r, &dconst1, 32);
21399 x = const_double_from_real_value (TWO32r, DFmode);
21400 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21402 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21404 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21407 emit_move_insn (target, x);
21410 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21411 For x86_32, -mfpmath=sse, !optimize_size only. */
21413 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21415 REAL_VALUE_TYPE ONE16r;
21416 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21418 real_ldexp (&ONE16r, &dconst1, 16);
21419 x = const_double_from_real_value (ONE16r, SFmode);
21420 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21421 NULL, 0, OPTAB_DIRECT);
21422 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21423 NULL, 0, OPTAB_DIRECT);
21424 fp_hi = gen_reg_rtx (SFmode);
21425 fp_lo = gen_reg_rtx (SFmode);
21426 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21427 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21428 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21430 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21432 if (!rtx_equal_p (target, fp_hi))
21433 emit_move_insn (target, fp_hi);
21436 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21437 a vector of unsigned ints VAL to vector of floats TARGET. */
21440 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
21443 REAL_VALUE_TYPE TWO16r;
21444 machine_mode intmode = GET_MODE (val);
21445 machine_mode fltmode = GET_MODE (target);
21446 rtx (*cvt) (rtx, rtx);
21448 if (intmode == V4SImode)
21449 cvt = gen_floatv4siv4sf2;
21451 cvt = gen_floatv8siv8sf2;
21452 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21453 tmp[0] = force_reg (intmode, tmp[0]);
21454 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21456 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21457 NULL_RTX, 1, OPTAB_DIRECT);
21458 tmp[3] = gen_reg_rtx (fltmode);
21459 emit_insn (cvt (tmp[3], tmp[1]));
21460 tmp[4] = gen_reg_rtx (fltmode);
21461 emit_insn (cvt (tmp[4], tmp[2]));
21462 real_ldexp (&TWO16r, &dconst1, 16);
21463 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21464 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21465 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21467 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21469 if (tmp[7] != target)
21470 emit_move_insn (target, tmp[7]);
21473 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21474 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21475 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21476 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21479 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21481 REAL_VALUE_TYPE TWO31r;
21482 rtx two31r, tmp[4];
21483 machine_mode mode = GET_MODE (val);
21484 machine_mode scalarmode = GET_MODE_INNER (mode);
21485 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
21486 rtx (*cmp) (rtx, rtx, rtx, rtx);
21489 for (i = 0; i < 3; i++)
21490 tmp[i] = gen_reg_rtx (mode);
21491 real_ldexp (&TWO31r, &dconst1, 31);
21492 two31r = const_double_from_real_value (TWO31r, scalarmode);
21493 two31r = ix86_build_const_vector (mode, 1, two31r);
21494 two31r = force_reg (mode, two31r);
21497 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
21498 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
21499 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
21500 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
21501 default: gcc_unreachable ();
21503 tmp[3] = gen_rtx_LE (mode, two31r, val);
21504 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
21505 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
21507 if (intmode == V4SImode || TARGET_AVX2)
21508 *xorp = expand_simple_binop (intmode, ASHIFT,
21509 gen_lowpart (intmode, tmp[0]),
21510 GEN_INT (31), NULL_RTX, 0,
21514 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
21515 two31 = ix86_build_const_vector (intmode, 1, two31);
21516 *xorp = expand_simple_binop (intmode, AND,
21517 gen_lowpart (intmode, tmp[0]),
21518 two31, NULL_RTX, 0,
21521 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
21525 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21526 then replicate the value for all elements of the vector
21530 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
21534 machine_mode scalar_mode;
21558 n_elt = GET_MODE_NUNITS (mode);
21559 v = rtvec_alloc (n_elt);
21560 scalar_mode = GET_MODE_INNER (mode);
21562 RTVEC_ELT (v, 0) = value;
21564 for (i = 1; i < n_elt; ++i)
21565 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
21567 return gen_rtx_CONST_VECTOR (mode, v);
21570 gcc_unreachable ();
21574 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21575 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21576 for an SSE register. If VECT is true, then replicate the mask for
21577 all elements of the vector register. If INVERT is true, then create
21578 a mask excluding the sign bit. */
21581 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
21583 machine_mode vec_mode, imode;
21611 vec_mode = VOIDmode;
21616 gcc_unreachable ();
21619 machine_mode inner_mode = GET_MODE_INNER (mode);
21620 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
21621 GET_MODE_BITSIZE (inner_mode));
21623 w = wi::bit_not (w);
21625 /* Force this value into the low part of a fp vector constant. */
21626 mask = immed_wide_int_const (w, imode);
21627 mask = gen_lowpart (inner_mode, mask);
21629 if (vec_mode == VOIDmode)
21630 return force_reg (inner_mode, mask);
21632 v = ix86_build_const_vector (vec_mode, vect, mask);
21633 return force_reg (vec_mode, v);
21636 /* Generate code for floating point ABS or NEG. */
21639 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
21642 rtx mask, set, dst, src;
21643 bool use_sse = false;
21644 bool vector_mode = VECTOR_MODE_P (mode);
21645 machine_mode vmode = mode;
21649 else if (mode == TFmode)
21651 else if (TARGET_SSE_MATH)
21653 use_sse = SSE_FLOAT_MODE_P (mode);
21654 if (mode == SFmode)
21656 else if (mode == DFmode)
21660 /* NEG and ABS performed with SSE use bitwise mask operations.
21661 Create the appropriate mask now. */
21663 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
21670 set = gen_rtx_fmt_e (code, mode, src);
21671 set = gen_rtx_SET (dst, set);
21678 use = gen_rtx_USE (VOIDmode, mask);
21680 par = gen_rtvec (2, set, use);
21683 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21684 par = gen_rtvec (3, set, use, clob);
21686 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
21692 /* Expand a copysign operation. Special case operand 0 being a constant. */
21695 ix86_expand_copysign (rtx operands[])
21697 machine_mode mode, vmode;
21698 rtx dest, op0, op1, mask, nmask;
21700 dest = operands[0];
21704 mode = GET_MODE (dest);
21706 if (mode == SFmode)
21708 else if (mode == DFmode)
21713 if (CONST_DOUBLE_P (op0))
21715 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
21717 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
21718 op0 = simplify_unary_operation (ABS, mode, op0, mode);
21720 if (mode == SFmode || mode == DFmode)
21722 if (op0 == CONST0_RTX (mode))
21723 op0 = CONST0_RTX (vmode);
21726 rtx v = ix86_build_const_vector (vmode, false, op0);
21728 op0 = force_reg (vmode, v);
21731 else if (op0 != CONST0_RTX (mode))
21732 op0 = force_reg (mode, op0);
21734 mask = ix86_build_signbit_mask (vmode, 0, 0);
21736 if (mode == SFmode)
21737 copysign_insn = gen_copysignsf3_const;
21738 else if (mode == DFmode)
21739 copysign_insn = gen_copysigndf3_const;
21741 copysign_insn = gen_copysigntf3_const;
21743 emit_insn (copysign_insn (dest, op0, op1, mask));
21747 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
21749 nmask = ix86_build_signbit_mask (vmode, 0, 1);
21750 mask = ix86_build_signbit_mask (vmode, 0, 0);
21752 if (mode == SFmode)
21753 copysign_insn = gen_copysignsf3_var;
21754 else if (mode == DFmode)
21755 copysign_insn = gen_copysigndf3_var;
21757 copysign_insn = gen_copysigntf3_var;
21759 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21763 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21764 be a constant, and so has already been expanded into a vector constant. */
21767 ix86_split_copysign_const (rtx operands[])
21769 machine_mode mode, vmode;
21770 rtx dest, op0, mask, x;
21772 dest = operands[0];
21774 mask = operands[3];
21776 mode = GET_MODE (dest);
21777 vmode = GET_MODE (mask);
21779 dest = lowpart_subreg (vmode, dest, mode);
21780 x = gen_rtx_AND (vmode, dest, mask);
21781 emit_insn (gen_rtx_SET (dest, x));
21783 if (op0 != CONST0_RTX (vmode))
21785 x = gen_rtx_IOR (vmode, dest, op0);
21786 emit_insn (gen_rtx_SET (dest, x));
21790 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21791 so we have to do two masks. */
21794 ix86_split_copysign_var (rtx operands[])
21796 machine_mode mode, vmode;
21797 rtx dest, scratch, op0, op1, mask, nmask, x;
21799 dest = operands[0];
21800 scratch = operands[1];
21803 nmask = operands[4];
21804 mask = operands[5];
21806 mode = GET_MODE (dest);
21807 vmode = GET_MODE (mask);
21809 if (rtx_equal_p (op0, op1))
21811 /* Shouldn't happen often (it's useless, obviously), but when it does
21812 we'd generate incorrect code if we continue below. */
21813 emit_move_insn (dest, op0);
21817 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21819 gcc_assert (REGNO (op1) == REGNO (scratch));
21821 x = gen_rtx_AND (vmode, scratch, mask);
21822 emit_insn (gen_rtx_SET (scratch, x));
21825 op0 = lowpart_subreg (vmode, op0, mode);
21826 x = gen_rtx_NOT (vmode, dest);
21827 x = gen_rtx_AND (vmode, x, op0);
21828 emit_insn (gen_rtx_SET (dest, x));
21832 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21834 x = gen_rtx_AND (vmode, scratch, mask);
21836 else /* alternative 2,4 */
21838 gcc_assert (REGNO (mask) == REGNO (scratch));
21839 op1 = lowpart_subreg (vmode, op1, mode);
21840 x = gen_rtx_AND (vmode, scratch, op1);
21842 emit_insn (gen_rtx_SET (scratch, x));
21844 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21846 dest = lowpart_subreg (vmode, op0, mode);
21847 x = gen_rtx_AND (vmode, dest, nmask);
21849 else /* alternative 3,4 */
21851 gcc_assert (REGNO (nmask) == REGNO (dest));
21853 op0 = lowpart_subreg (vmode, op0, mode);
21854 x = gen_rtx_AND (vmode, dest, op0);
21856 emit_insn (gen_rtx_SET (dest, x));
21859 x = gen_rtx_IOR (vmode, dest, scratch);
21860 emit_insn (gen_rtx_SET (dest, x));
21863 /* Expand an xorsign operation. */
21866 ix86_expand_xorsign (rtx operands[])
21868 rtx (*xorsign_insn)(rtx, rtx, rtx, rtx);
21869 machine_mode mode, vmode;
21870 rtx dest, op0, op1, mask;
21872 dest = operands[0];
21876 mode = GET_MODE (dest);
21878 if (mode == SFmode)
21880 xorsign_insn = gen_xorsignsf3_1;
21883 else if (mode == DFmode)
21885 xorsign_insn = gen_xorsigndf3_1;
21889 gcc_unreachable ();
21891 mask = ix86_build_signbit_mask (vmode, 0, 0);
21893 emit_insn (xorsign_insn (dest, op0, op1, mask));
21896 /* Deconstruct an xorsign operation into bit masks. */
21899 ix86_split_xorsign (rtx operands[])
21901 machine_mode mode, vmode;
21902 rtx dest, op0, mask, x;
21904 dest = operands[0];
21906 mask = operands[3];
21908 mode = GET_MODE (dest);
21909 vmode = GET_MODE (mask);
21911 dest = lowpart_subreg (vmode, dest, mode);
21912 x = gen_rtx_AND (vmode, dest, mask);
21913 emit_insn (gen_rtx_SET (dest, x));
21915 op0 = lowpart_subreg (vmode, op0, mode);
21916 x = gen_rtx_XOR (vmode, dest, op0);
21917 emit_insn (gen_rtx_SET (dest, x));
21920 /* Return TRUE or FALSE depending on whether the first SET in INSN
21921 has source and destination with matching CC modes, and that the
21922 CC mode is at least as constrained as REQ_MODE. */
21925 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21928 machine_mode set_mode;
21930 set = PATTERN (insn);
21931 if (GET_CODE (set) == PARALLEL)
21932 set = XVECEXP (set, 0, 0);
21933 gcc_assert (GET_CODE (set) == SET);
21934 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21936 set_mode = GET_MODE (SET_DEST (set));
21940 if (req_mode != CCNOmode
21941 && (req_mode != CCmode
21942 || XEXP (SET_SRC (set), 1) != const0_rtx))
21946 if (req_mode == CCGCmode)
21950 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21954 if (req_mode == CCZmode)
21967 if (set_mode != req_mode)
21972 gcc_unreachable ();
21975 return GET_MODE (SET_SRC (set)) == set_mode;
21978 /* Generate insn patterns to do an integer compare of OPERANDS. */
21981 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21983 machine_mode cmpmode;
21986 cmpmode = SELECT_CC_MODE (code, op0, op1);
21987 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21989 /* This is very simple, but making the interface the same as in the
21990 FP case makes the rest of the code easier. */
21991 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21992 emit_insn (gen_rtx_SET (flags, tmp));
21994 /* Return the test that should be put into the flags user, i.e.
21995 the bcc, scc, or cmov instruction. */
21996 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21999 /* Figure out whether to use unordered fp comparisons. */
22002 ix86_unordered_fp_compare (enum rtx_code code)
22004 if (!TARGET_IEEE_FP)
22029 gcc_unreachable ();
22034 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
22036 machine_mode mode = GET_MODE (op0);
22038 if (SCALAR_FLOAT_MODE_P (mode))
22040 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22046 /* Only zero flag is needed. */
22047 case EQ: /* ZF=0 */
22048 case NE: /* ZF!=0 */
22050 /* Codes needing carry flag. */
22051 case GEU: /* CF=0 */
22052 case LTU: /* CF=1 */
22053 /* Detect overflow checks. They need just the carry flag. */
22054 if (GET_CODE (op0) == PLUS
22055 && (rtx_equal_p (op1, XEXP (op0, 0))
22056 || rtx_equal_p (op1, XEXP (op0, 1))))
22060 case GTU: /* CF=0 & ZF=0 */
22061 case LEU: /* CF=1 | ZF=1 */
22063 /* Codes possibly doable only with sign flag when
22064 comparing against zero. */
22065 case GE: /* SF=OF or SF=0 */
22066 case LT: /* SF<>OF or SF=1 */
22067 if (op1 == const0_rtx)
22070 /* For other cases Carry flag is not required. */
22072 /* Codes doable only with sign flag when comparing
22073 against zero, but we miss jump instruction for it
22074 so we need to use relational tests against overflow
22075 that thus needs to be zero. */
22076 case GT: /* ZF=0 & SF=OF */
22077 case LE: /* ZF=1 | SF<>OF */
22078 if (op1 == const0_rtx)
22082 /* strcmp pattern do (use flags) and combine may ask us for proper
22087 gcc_unreachable ();
22091 /* Return the fixed registers used for condition codes. */
22094 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22097 *p2 = INVALID_REGNUM;
22101 /* If two condition code modes are compatible, return a condition code
22102 mode which is compatible with both. Otherwise, return
22105 static machine_mode
22106 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
22111 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
22114 if ((m1 == CCGCmode && m2 == CCGOCmode)
22115 || (m1 == CCGOCmode && m2 == CCGCmode))
22118 if ((m1 == CCNOmode && m2 == CCGOCmode)
22119 || (m1 == CCGOCmode && m2 == CCNOmode))
22123 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
22125 else if (m2 == CCZmode
22126 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
22132 gcc_unreachable ();
22163 /* These are only compatible with themselves, which we already
22170 /* Return a comparison we can do and that it is equivalent to
22171 swap_condition (code) apart possibly from orderedness.
22172 But, never change orderedness if TARGET_IEEE_FP, returning
22173 UNKNOWN in that case if necessary. */
22175 static enum rtx_code
22176 ix86_fp_swap_condition (enum rtx_code code)
22180 case GT: /* GTU - CF=0 & ZF=0 */
22181 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
22182 case GE: /* GEU - CF=0 */
22183 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
22184 case UNLT: /* LTU - CF=1 */
22185 return TARGET_IEEE_FP ? UNKNOWN : GT;
22186 case UNLE: /* LEU - CF=1 | ZF=1 */
22187 return TARGET_IEEE_FP ? UNKNOWN : GE;
22189 return swap_condition (code);
22193 /* Return cost of comparison CODE using the best strategy for performance.
22194 All following functions do use number of instructions as a cost metrics.
22195 In future this should be tweaked to compute bytes for optimize_size and
22196 take into account performance of various instructions on various CPUs. */
22199 ix86_fp_comparison_cost (enum rtx_code code)
22203 /* The cost of code using bit-twiddling on %ah. */
22220 arith_cost = TARGET_IEEE_FP ? 5 : 4;
22224 arith_cost = TARGET_IEEE_FP ? 6 : 4;
22227 gcc_unreachable ();
22230 switch (ix86_fp_comparison_strategy (code))
22232 case IX86_FPCMP_COMI:
22233 return arith_cost > 4 ? 3 : 2;
22234 case IX86_FPCMP_SAHF:
22235 return arith_cost > 4 ? 4 : 3;
22241 /* Return strategy to use for floating-point. We assume that fcomi is always
22242 preferrable where available, since that is also true when looking at size
22243 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22245 enum ix86_fpcmp_strategy
22246 ix86_fp_comparison_strategy (enum rtx_code)
22248 /* Do fcomi/sahf based test when profitable. */
22251 return IX86_FPCMP_COMI;
22253 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22254 return IX86_FPCMP_SAHF;
22256 return IX86_FPCMP_ARITH;
22259 /* Swap, force into registers, or otherwise massage the two operands
22260 to a fp comparison. The operands are updated in place; the new
22261 comparison code is returned. */
22263 static enum rtx_code
22264 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22266 bool unordered_compare = ix86_unordered_fp_compare (code);
22267 rtx op0 = *pop0, op1 = *pop1;
22268 machine_mode op_mode = GET_MODE (op0);
22269 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22271 /* All of the unordered compare instructions only work on registers.
22272 The same is true of the fcomi compare instructions. The XFmode
22273 compare instructions require registers except when comparing
22274 against zero or when converting operand 1 from fixed point to
22278 && (unordered_compare
22279 || (op_mode == XFmode
22280 && ! (standard_80387_constant_p (op0) == 1
22281 || standard_80387_constant_p (op1) == 1)
22282 && GET_CODE (op1) != FLOAT)
22283 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22285 op0 = force_reg (op_mode, op0);
22286 op1 = force_reg (op_mode, op1);
22290 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22291 things around if they appear profitable, otherwise force op0
22292 into a register. */
22294 if (standard_80387_constant_p (op0) == 0
22296 && ! (standard_80387_constant_p (op1) == 0
22299 enum rtx_code new_code = ix86_fp_swap_condition (code);
22300 if (new_code != UNKNOWN)
22302 std::swap (op0, op1);
22308 op0 = force_reg (op_mode, op0);
22310 if (CONSTANT_P (op1))
22312 int tmp = standard_80387_constant_p (op1);
22314 op1 = validize_mem (force_const_mem (op_mode, op1));
22318 op1 = force_reg (op_mode, op1);
22321 op1 = force_reg (op_mode, op1);
22325 /* Try to rearrange the comparison to make it cheaper. */
22326 if (ix86_fp_comparison_cost (code)
22327 > ix86_fp_comparison_cost (swap_condition (code))
22328 && (REG_P (op1) || can_create_pseudo_p ()))
22330 std::swap (op0, op1);
22331 code = swap_condition (code);
22333 op0 = force_reg (op_mode, op0);
22341 /* Convert comparison codes we use to represent FP comparison to integer
22342 code that will result in proper branch. Return UNKNOWN if no such code
22346 ix86_fp_compare_code_to_integer (enum rtx_code code)
22370 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22373 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1)
22375 bool unordered_compare = ix86_unordered_fp_compare (code);
22376 machine_mode cmp_mode;
22379 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22381 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22382 if (unordered_compare)
22383 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22385 /* Do fcomi/sahf based test when profitable. */
22386 switch (ix86_fp_comparison_strategy (code))
22388 case IX86_FPCMP_COMI:
22389 cmp_mode = CCFPmode;
22390 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22393 case IX86_FPCMP_SAHF:
22394 cmp_mode = CCFPmode;
22395 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22396 scratch = gen_reg_rtx (HImode);
22397 emit_insn (gen_rtx_SET (scratch, tmp));
22398 emit_insn (gen_x86_sahf_1 (scratch));
22401 case IX86_FPCMP_ARITH:
22402 cmp_mode = CCNOmode;
22403 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22404 scratch = gen_reg_rtx (HImode);
22405 emit_insn (gen_rtx_SET (scratch, tmp));
22407 /* In the unordered case, we have to check C2 for NaN's, which
22408 doesn't happen to work out to anything nice combination-wise.
22409 So do some bit twiddling on the value we've got in AH to come
22410 up with an appropriate set of condition codes. */
22416 if (code == GT || !TARGET_IEEE_FP)
22418 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22423 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22424 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22425 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22432 if (code == LT && TARGET_IEEE_FP)
22434 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22435 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22441 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22447 if (code == GE || !TARGET_IEEE_FP)
22449 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22454 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22455 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22461 if (code == LE && TARGET_IEEE_FP)
22463 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22464 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22465 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22471 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22477 if (code == EQ && TARGET_IEEE_FP)
22479 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22480 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22486 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22492 if (code == NE && TARGET_IEEE_FP)
22494 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22495 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
22501 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22507 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22511 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22516 gcc_unreachable ();
22524 /* Return the test that should be put into the flags user, i.e.
22525 the bcc, scc, or cmov instruction. */
22526 return gen_rtx_fmt_ee (code, VOIDmode,
22527 gen_rtx_REG (cmp_mode, FLAGS_REG),
22532 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
22536 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
22537 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
22539 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
22541 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
22542 ret = ix86_expand_fp_compare (code, op0, op1);
22545 ret = ix86_expand_int_compare (code, op0, op1);
22551 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
22553 machine_mode mode = GET_MODE (op0);
22556 /* Handle special case - vector comparsion with boolean result, transform
22557 it using ptest instruction. */
22558 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22560 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
22561 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
22563 gcc_assert (code == EQ || code == NE);
22564 /* Generate XOR since we can't check that one operand is zero vector. */
22565 tmp = gen_reg_rtx (mode);
22566 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
22567 tmp = gen_lowpart (p_mode, tmp);
22568 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
22569 gen_rtx_UNSPEC (CCmode,
22570 gen_rtvec (2, tmp, tmp),
22572 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
22573 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22574 gen_rtx_LABEL_REF (VOIDmode, label),
22576 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22589 tmp = ix86_expand_compare (code, op0, op1);
22590 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22591 gen_rtx_LABEL_REF (VOIDmode, label),
22593 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22599 /* For 32-bit target DI comparison may be performed on
22600 SSE registers. To allow this we should avoid split
22601 to SI mode which is achieved by doing xor in DI mode
22602 and then comparing with zero (which is recognized by
22603 STV pass). We don't compare using xor when optimizing
22605 if (!optimize_insn_for_size_p ()
22607 && (code == EQ || code == NE))
22609 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
22614 /* Expand DImode branch into multiple compare+branch. */
22617 rtx_code_label *label2;
22618 enum rtx_code code1, code2, code3;
22619 machine_mode submode;
22621 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
22623 std::swap (op0, op1);
22624 code = swap_condition (code);
22627 split_double_mode (mode, &op0, 1, lo+0, hi+0);
22628 split_double_mode (mode, &op1, 1, lo+1, hi+1);
22630 submode = mode == DImode ? SImode : DImode;
22632 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22633 avoid two branches. This costs one extra insn, so disable when
22634 optimizing for size. */
22636 if ((code == EQ || code == NE)
22637 && (!optimize_insn_for_size_p ()
22638 || hi[1] == const0_rtx || lo[1] == const0_rtx))
22643 if (hi[1] != const0_rtx)
22644 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
22645 NULL_RTX, 0, OPTAB_WIDEN);
22648 if (lo[1] != const0_rtx)
22649 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
22650 NULL_RTX, 0, OPTAB_WIDEN);
22652 tmp = expand_binop (submode, ior_optab, xor1, xor0,
22653 NULL_RTX, 0, OPTAB_WIDEN);
22655 ix86_expand_branch (code, tmp, const0_rtx, label);
22659 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22660 op1 is a constant and the low word is zero, then we can just
22661 examine the high word. Similarly for low word -1 and
22662 less-or-equal-than or greater-than. */
22664 if (CONST_INT_P (hi[1]))
22667 case LT: case LTU: case GE: case GEU:
22668 if (lo[1] == const0_rtx)
22670 ix86_expand_branch (code, hi[0], hi[1], label);
22674 case LE: case LEU: case GT: case GTU:
22675 if (lo[1] == constm1_rtx)
22677 ix86_expand_branch (code, hi[0], hi[1], label);
22685 /* Emulate comparisons that do not depend on Zero flag with
22686 double-word subtraction. Note that only Overflow, Sign
22687 and Carry flags are valid, so swap arguments and condition
22688 of comparisons that would otherwise test Zero flag. */
22692 case LE: case LEU: case GT: case GTU:
22693 std::swap (lo[0], lo[1]);
22694 std::swap (hi[0], hi[1]);
22695 code = swap_condition (code);
22698 case LT: case LTU: case GE: case GEU:
22700 rtx (*cmp_insn) (rtx, rtx);
22701 rtx (*sbb_insn) (rtx, rtx, rtx);
22702 bool uns = (code == LTU || code == GEU);
22706 cmp_insn = gen_cmpdi_1;
22708 = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
22712 cmp_insn = gen_cmpsi_1;
22714 = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
22717 if (!nonimmediate_operand (lo[0], submode))
22718 lo[0] = force_reg (submode, lo[0]);
22719 if (!x86_64_general_operand (lo[1], submode))
22720 lo[1] = force_reg (submode, lo[1]);
22722 if (!register_operand (hi[0], submode))
22723 hi[0] = force_reg (submode, hi[0]);
22724 if ((uns && !nonimmediate_operand (hi[1], submode))
22725 || (!uns && !x86_64_general_operand (hi[1], submode)))
22726 hi[1] = force_reg (submode, hi[1]);
22728 emit_insn (cmp_insn (lo[0], lo[1]));
22729 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
22731 tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
22733 ix86_expand_branch (code, tmp, const0_rtx, label);
22741 /* Otherwise, we need two or three jumps. */
22743 label2 = gen_label_rtx ();
22746 code2 = swap_condition (code);
22747 code3 = unsigned_condition (code);
22751 case LT: case GT: case LTU: case GTU:
22754 case LE: code1 = LT; code2 = GT; break;
22755 case GE: code1 = GT; code2 = LT; break;
22756 case LEU: code1 = LTU; code2 = GTU; break;
22757 case GEU: code1 = GTU; code2 = LTU; break;
22759 case EQ: code1 = UNKNOWN; code2 = NE; break;
22760 case NE: code2 = UNKNOWN; break;
22763 gcc_unreachable ();
22768 * if (hi(a) < hi(b)) goto true;
22769 * if (hi(a) > hi(b)) goto false;
22770 * if (lo(a) < lo(b)) goto true;
22774 if (code1 != UNKNOWN)
22775 ix86_expand_branch (code1, hi[0], hi[1], label);
22776 if (code2 != UNKNOWN)
22777 ix86_expand_branch (code2, hi[0], hi[1], label2);
22779 ix86_expand_branch (code3, lo[0], lo[1], label);
22781 if (code2 != UNKNOWN)
22782 emit_label (label2);
22787 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
22793 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22797 gcc_assert (GET_MODE (dest) == QImode);
22799 ret = ix86_expand_compare (code, op0, op1);
22800 PUT_MODE (ret, QImode);
22801 emit_insn (gen_rtx_SET (dest, ret));
22804 /* Expand comparison setting or clearing carry flag. Return true when
22805 successful and set pop for the operation. */
22807 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
22810 = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
22812 /* Do not handle double-mode compares that go through special path. */
22813 if (mode == (TARGET_64BIT ? TImode : DImode))
22816 if (SCALAR_FLOAT_MODE_P (mode))
22819 rtx_insn *compare_seq;
22821 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22823 /* Shortcut: following common codes never translate
22824 into carry flag compares. */
22825 if (code == EQ || code == NE || code == UNEQ || code == LTGT
22826 || code == ORDERED || code == UNORDERED)
22829 /* These comparisons require zero flag; swap operands so they won't. */
22830 if ((code == GT || code == UNLE || code == LE || code == UNGT)
22831 && !TARGET_IEEE_FP)
22833 std::swap (op0, op1);
22834 code = swap_condition (code);
22837 /* Try to expand the comparison and verify that we end up with
22838 carry flag based comparison. This fails to be true only when
22839 we decide to expand comparison using arithmetic that is not
22840 too common scenario. */
22842 compare_op = ix86_expand_fp_compare (code, op0, op1);
22843 compare_seq = get_insns ();
22846 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
22847 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
22849 code = GET_CODE (compare_op);
22851 if (code != LTU && code != GEU)
22854 emit_insn (compare_seq);
22859 if (!INTEGRAL_MODE_P (mode))
22868 /* Convert a==0 into (unsigned)a<1. */
22871 if (op1 != const0_rtx)
22874 code = (code == EQ ? LTU : GEU);
22877 /* Convert a>b into b<a or a>=b-1. */
22880 if (CONST_INT_P (op1))
22882 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22883 /* Bail out on overflow. We still can swap operands but that
22884 would force loading of the constant into register. */
22885 if (op1 == const0_rtx
22886 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22888 code = (code == GTU ? GEU : LTU);
22892 std::swap (op0, op1);
22893 code = (code == GTU ? LTU : GEU);
22897 /* Convert a>=0 into (unsigned)a<0x80000000. */
22900 if (mode == DImode || op1 != const0_rtx)
22902 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22903 code = (code == LT ? GEU : LTU);
22907 if (mode == DImode || op1 != constm1_rtx)
22909 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22910 code = (code == LE ? GEU : LTU);
22916 /* Swapping operands may cause constant to appear as first operand. */
22917 if (!nonimmediate_operand (op0, VOIDmode))
22919 if (!can_create_pseudo_p ())
22921 op0 = force_reg (mode, op0);
22923 *pop = ix86_expand_compare (code, op0, op1);
22924 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22929 ix86_expand_int_movcc (rtx operands[])
22931 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22932 rtx_insn *compare_seq;
22934 machine_mode mode = GET_MODE (operands[0]);
22935 bool sign_bit_compare_p = false;
22936 rtx op0 = XEXP (operands[1], 0);
22937 rtx op1 = XEXP (operands[1], 1);
22939 if (GET_MODE (op0) == TImode
22940 || (GET_MODE (op0) == DImode
22945 compare_op = ix86_expand_compare (code, op0, op1);
22946 compare_seq = get_insns ();
22949 compare_code = GET_CODE (compare_op);
22951 if ((op1 == const0_rtx && (code == GE || code == LT))
22952 || (op1 == constm1_rtx && (code == GT || code == LE)))
22953 sign_bit_compare_p = true;
22955 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22956 HImode insns, we'd be swallowed in word prefix ops. */
22958 if ((mode != HImode || TARGET_FAST_PREFIX)
22959 && (mode != (TARGET_64BIT ? TImode : DImode))
22960 && CONST_INT_P (operands[2])
22961 && CONST_INT_P (operands[3]))
22963 rtx out = operands[0];
22964 HOST_WIDE_INT ct = INTVAL (operands[2]);
22965 HOST_WIDE_INT cf = INTVAL (operands[3]);
22966 HOST_WIDE_INT diff;
22969 /* Sign bit compares are better done using shifts than we do by using
22971 if (sign_bit_compare_p
22972 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22974 /* Detect overlap between destination and compare sources. */
22977 if (!sign_bit_compare_p)
22980 bool fpcmp = false;
22982 compare_code = GET_CODE (compare_op);
22984 flags = XEXP (compare_op, 0);
22986 if (GET_MODE (flags) == CCFPmode)
22990 = ix86_fp_compare_code_to_integer (compare_code);
22993 /* To simplify rest of code, restrict to the GEU case. */
22994 if (compare_code == LTU)
22996 std::swap (ct, cf);
22997 compare_code = reverse_condition (compare_code);
22998 code = reverse_condition (code);
23003 PUT_CODE (compare_op,
23004 reverse_condition_maybe_unordered
23005 (GET_CODE (compare_op)));
23007 PUT_CODE (compare_op,
23008 reverse_condition (GET_CODE (compare_op)));
23012 if (reg_overlap_mentioned_p (out, op0)
23013 || reg_overlap_mentioned_p (out, op1))
23014 tmp = gen_reg_rtx (mode);
23016 if (mode == DImode)
23017 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
23019 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
23020 flags, compare_op));
23024 if (code == GT || code == GE)
23025 code = reverse_condition (code);
23028 std::swap (ct, cf);
23031 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
23044 tmp = expand_simple_binop (mode, PLUS,
23046 copy_rtx (tmp), 1, OPTAB_DIRECT);
23057 tmp = expand_simple_binop (mode, IOR,
23059 copy_rtx (tmp), 1, OPTAB_DIRECT);
23061 else if (diff == -1 && ct)
23071 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23073 tmp = expand_simple_binop (mode, PLUS,
23074 copy_rtx (tmp), GEN_INT (cf),
23075 copy_rtx (tmp), 1, OPTAB_DIRECT);
23083 * andl cf - ct, dest
23093 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23096 tmp = expand_simple_binop (mode, AND,
23098 gen_int_mode (cf - ct, mode),
23099 copy_rtx (tmp), 1, OPTAB_DIRECT);
23101 tmp = expand_simple_binop (mode, PLUS,
23102 copy_rtx (tmp), GEN_INT (ct),
23103 copy_rtx (tmp), 1, OPTAB_DIRECT);
23106 if (!rtx_equal_p (tmp, out))
23107 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
23114 machine_mode cmp_mode = GET_MODE (op0);
23115 enum rtx_code new_code;
23117 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23119 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23121 /* We may be reversing unordered compare to normal compare, that
23122 is not valid in general (we may convert non-trapping condition
23123 to trapping one), however on i386 we currently emit all
23124 comparisons unordered. */
23125 new_code = reverse_condition_maybe_unordered (code);
23128 new_code = ix86_reverse_condition (code, cmp_mode);
23129 if (new_code != UNKNOWN)
23131 std::swap (ct, cf);
23137 compare_code = UNKNOWN;
23138 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
23139 && CONST_INT_P (op1))
23141 if (op1 == const0_rtx
23142 && (code == LT || code == GE))
23143 compare_code = code;
23144 else if (op1 == constm1_rtx)
23148 else if (code == GT)
23153 /* Optimize dest = (op0 < 0) ? -1 : cf. */
23154 if (compare_code != UNKNOWN
23155 && GET_MODE (op0) == GET_MODE (out)
23156 && (cf == -1 || ct == -1))
23158 /* If lea code below could be used, only optimize
23159 if it results in a 2 insn sequence. */
23161 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
23162 || diff == 3 || diff == 5 || diff == 9)
23163 || (compare_code == LT && ct == -1)
23164 || (compare_code == GE && cf == -1))
23167 * notl op1 (if necessary)
23175 code = reverse_condition (code);
23178 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23180 out = expand_simple_binop (mode, IOR,
23182 out, 1, OPTAB_DIRECT);
23183 if (out != operands[0])
23184 emit_move_insn (operands[0], out);
23191 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
23192 || diff == 3 || diff == 5 || diff == 9)
23193 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
23195 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
23201 * lea cf(dest*(ct-cf)),dest
23205 * This also catches the degenerate setcc-only case.
23211 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23214 /* On x86_64 the lea instruction operates on Pmode, so we need
23215 to get arithmetics done in proper mode to match. */
23217 tmp = copy_rtx (out);
23221 out1 = copy_rtx (out);
23222 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
23226 tmp = gen_rtx_PLUS (mode, tmp, out1);
23232 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23235 if (!rtx_equal_p (tmp, out))
23238 out = force_operand (tmp, copy_rtx (out));
23240 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23242 if (!rtx_equal_p (out, operands[0]))
23243 emit_move_insn (operands[0], copy_rtx (out));
23249 * General case: Jumpful:
23250 * xorl dest,dest cmpl op1, op2
23251 * cmpl op1, op2 movl ct, dest
23252 * setcc dest jcc 1f
23253 * decl dest movl cf, dest
23254 * andl (cf-ct),dest 1:
23257 * Size 20. Size 14.
23259 * This is reasonably steep, but branch mispredict costs are
23260 * high on modern cpus, so consider failing only if optimizing
23264 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23265 && BRANCH_COST (optimize_insn_for_speed_p (),
23270 machine_mode cmp_mode = GET_MODE (op0);
23271 enum rtx_code new_code;
23273 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23275 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23277 /* We may be reversing unordered compare to normal compare,
23278 that is not valid in general (we may convert non-trapping
23279 condition to trapping one), however on i386 we currently
23280 emit all comparisons unordered. */
23281 new_code = reverse_condition_maybe_unordered (code);
23285 new_code = ix86_reverse_condition (code, cmp_mode);
23286 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23287 compare_code = reverse_condition (compare_code);
23290 if (new_code != UNKNOWN)
23298 if (compare_code != UNKNOWN)
23300 /* notl op1 (if needed)
23305 For x < 0 (resp. x <= -1) there will be no notl,
23306 so if possible swap the constants to get rid of the
23308 True/false will be -1/0 while code below (store flag
23309 followed by decrement) is 0/-1, so the constants need
23310 to be exchanged once more. */
23312 if (compare_code == GE || !cf)
23314 code = reverse_condition (code);
23318 std::swap (ct, cf);
23320 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23324 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23326 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23328 copy_rtx (out), 1, OPTAB_DIRECT);
23331 out = expand_simple_binop (mode, AND, copy_rtx (out),
23332 gen_int_mode (cf - ct, mode),
23333 copy_rtx (out), 1, OPTAB_DIRECT);
23335 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23336 copy_rtx (out), 1, OPTAB_DIRECT);
23337 if (!rtx_equal_p (out, operands[0]))
23338 emit_move_insn (operands[0], copy_rtx (out));
23344 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23346 /* Try a few things more with specific constants and a variable. */
23349 rtx var, orig_out, out, tmp;
23351 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23354 /* If one of the two operands is an interesting constant, load a
23355 constant with the above and mask it in with a logical operation. */
23357 if (CONST_INT_P (operands[2]))
23360 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23361 operands[3] = constm1_rtx, op = and_optab;
23362 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23363 operands[3] = const0_rtx, op = ior_optab;
23367 else if (CONST_INT_P (operands[3]))
23370 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23371 operands[2] = constm1_rtx, op = and_optab;
23372 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23373 operands[2] = const0_rtx, op = ior_optab;
23380 orig_out = operands[0];
23381 tmp = gen_reg_rtx (mode);
23384 /* Recurse to get the constant loaded. */
23385 if (!ix86_expand_int_movcc (operands))
23388 /* Mask in the interesting variable. */
23389 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23391 if (!rtx_equal_p (out, orig_out))
23392 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23398 * For comparison with above,
23408 if (! nonimmediate_operand (operands[2], mode))
23409 operands[2] = force_reg (mode, operands[2]);
23410 if (! nonimmediate_operand (operands[3], mode))
23411 operands[3] = force_reg (mode, operands[3]);
23413 if (! register_operand (operands[2], VOIDmode)
23415 || ! register_operand (operands[3], VOIDmode)))
23416 operands[2] = force_reg (mode, operands[2]);
23419 && ! register_operand (operands[3], VOIDmode))
23420 operands[3] = force_reg (mode, operands[3]);
23422 emit_insn (compare_seq);
23423 emit_insn (gen_rtx_SET (operands[0],
23424 gen_rtx_IF_THEN_ELSE (mode,
23425 compare_op, operands[2],
23430 /* Swap, force into registers, or otherwise massage the two operands
23431 to an sse comparison with a mask result. Thus we differ a bit from
23432 ix86_prepare_fp_compare_args which expects to produce a flags result.
23434 The DEST operand exists to help determine whether to commute commutative
23435 operators. The POP0/POP1 operands are updated in place. The new
23436 comparison code is returned, or UNKNOWN if not implementable. */
23438 static enum rtx_code
23439 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23440 rtx *pop0, rtx *pop1)
23446 /* AVX supports all the needed comparisons. */
23449 /* We have no LTGT as an operator. We could implement it with
23450 NE & ORDERED, but this requires an extra temporary. It's
23451 not clear that it's worth it. */
23458 /* These are supported directly. */
23465 /* AVX has 3 operand comparisons, no need to swap anything. */
23468 /* For commutative operators, try to canonicalize the destination
23469 operand to be first in the comparison - this helps reload to
23470 avoid extra moves. */
23471 if (!dest || !rtx_equal_p (dest, *pop1))
23479 /* These are not supported directly before AVX, and furthermore
23480 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23481 comparison operands to transform into something that is
23483 std::swap (*pop0, *pop1);
23484 code = swap_condition (code);
23488 gcc_unreachable ();
23494 /* Detect conditional moves that exactly match min/max operational
23495 semantics. Note that this is IEEE safe, as long as we don't
23496 interchange the operands.
23498 Returns FALSE if this conditional move doesn't match a MIN/MAX,
23499 and TRUE if the operation is successful and instructions are emitted. */
23502 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
23503 rtx cmp_op1, rtx if_true, rtx if_false)
23511 else if (code == UNGE)
23512 std::swap (if_true, if_false);
23516 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
23518 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
23523 mode = GET_MODE (dest);
23525 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23526 but MODE may be a vector mode and thus not appropriate. */
23527 if (!flag_finite_math_only || flag_signed_zeros)
23529 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23532 if_true = force_reg (mode, if_true);
23533 v = gen_rtvec (2, if_true, if_false);
23534 tmp = gen_rtx_UNSPEC (mode, v, u);
23538 code = is_min ? SMIN : SMAX;
23539 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
23542 emit_insn (gen_rtx_SET (dest, tmp));
23546 /* Expand an SSE comparison. Return the register with the result. */
23549 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
23550 rtx op_true, rtx op_false)
23552 machine_mode mode = GET_MODE (dest);
23553 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
23555 /* In general case result of comparison can differ from operands' type. */
23556 machine_mode cmp_mode;
23558 /* In AVX512F the result of comparison is an integer mask. */
23559 bool maskcmp = false;
23562 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
23564 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
23565 cmp_mode = int_mode_for_size (nbits, 0).require ();
23569 cmp_mode = cmp_ops_mode;
23571 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
23573 int (*op1_predicate)(rtx, machine_mode)
23574 = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand;
23576 if (!op1_predicate (cmp_op1, cmp_ops_mode))
23577 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
23580 || (maskcmp && cmp_mode != mode)
23581 || (op_true && reg_overlap_mentioned_p (dest, op_true))
23582 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
23583 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
23585 /* Compare patterns for int modes are unspec in AVX512F only. */
23586 if (maskcmp && (code == GT || code == EQ))
23588 rtx (*gen)(rtx, rtx, rtx);
23590 switch (cmp_ops_mode)
23593 gcc_assert (TARGET_AVX512BW);
23594 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
23597 gcc_assert (TARGET_AVX512BW);
23598 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
23601 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
23604 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
23612 emit_insn (gen (dest, cmp_op0, cmp_op1));
23616 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
23618 if (cmp_mode != mode && !maskcmp)
23620 x = force_reg (cmp_ops_mode, x);
23621 convert_move (dest, x, false);
23624 emit_insn (gen_rtx_SET (dest, x));
23629 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23630 operations. This is used for both scalar and vector conditional moves. */
23633 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
23635 machine_mode mode = GET_MODE (dest);
23636 machine_mode cmpmode = GET_MODE (cmp);
23638 /* In AVX512F the result of comparison is an integer mask. */
23639 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
23643 /* If we have an integer mask and FP value then we need
23644 to cast mask to FP mode. */
23645 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
23647 cmp = force_reg (cmpmode, cmp);
23648 cmp = gen_rtx_SUBREG (mode, cmp, 0);
23653 rtx (*gen) (rtx, rtx) = NULL;
23654 if ((op_true == CONST0_RTX (mode)
23655 && vector_all_ones_operand (op_false, mode))
23656 || (op_false == CONST0_RTX (mode)
23657 && vector_all_ones_operand (op_true, mode)))
23661 if (TARGET_AVX512BW)
23662 gen = gen_avx512bw_cvtmask2bv64qi;
23665 if (TARGET_AVX512VL && TARGET_AVX512BW)
23666 gen = gen_avx512vl_cvtmask2bv32qi;
23669 if (TARGET_AVX512VL && TARGET_AVX512BW)
23670 gen = gen_avx512vl_cvtmask2bv16qi;
23673 if (TARGET_AVX512BW)
23674 gen = gen_avx512bw_cvtmask2wv32hi;
23677 if (TARGET_AVX512VL && TARGET_AVX512BW)
23678 gen = gen_avx512vl_cvtmask2wv16hi;
23681 if (TARGET_AVX512VL && TARGET_AVX512BW)
23682 gen = gen_avx512vl_cvtmask2wv8hi;
23685 if (TARGET_AVX512DQ)
23686 gen = gen_avx512f_cvtmask2dv16si;
23689 if (TARGET_AVX512VL && TARGET_AVX512DQ)
23690 gen = gen_avx512vl_cvtmask2dv8si;
23693 if (TARGET_AVX512VL && TARGET_AVX512DQ)
23694 gen = gen_avx512vl_cvtmask2dv4si;
23697 if (TARGET_AVX512DQ)
23698 gen = gen_avx512f_cvtmask2qv8di;
23701 if (TARGET_AVX512VL && TARGET_AVX512DQ)
23702 gen = gen_avx512vl_cvtmask2qv4di;
23705 if (TARGET_AVX512VL && TARGET_AVX512DQ)
23706 gen = gen_avx512vl_cvtmask2qv2di;
23711 if (gen && SCALAR_INT_MODE_P (cmpmode))
23713 cmp = force_reg (cmpmode, cmp);
23714 if (op_true == CONST0_RTX (mode))
23716 rtx (*gen_not) (rtx, rtx);
23719 case E_QImode: gen_not = gen_knotqi; break;
23720 case E_HImode: gen_not = gen_knothi; break;
23721 case E_SImode: gen_not = gen_knotsi; break;
23722 case E_DImode: gen_not = gen_knotdi; break;
23723 default: gcc_unreachable ();
23725 rtx n = gen_reg_rtx (cmpmode);
23726 emit_insn (gen_not (n, cmp));
23729 emit_insn (gen (dest, cmp));
23733 else if (vector_all_ones_operand (op_true, mode)
23734 && op_false == CONST0_RTX (mode))
23736 emit_insn (gen_rtx_SET (dest, cmp));
23739 else if (op_false == CONST0_RTX (mode))
23741 op_true = force_reg (mode, op_true);
23742 x = gen_rtx_AND (mode, cmp, op_true);
23743 emit_insn (gen_rtx_SET (dest, x));
23746 else if (op_true == CONST0_RTX (mode))
23748 op_false = force_reg (mode, op_false);
23749 x = gen_rtx_NOT (mode, cmp);
23750 x = gen_rtx_AND (mode, x, op_false);
23751 emit_insn (gen_rtx_SET (dest, x));
23754 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
23756 op_false = force_reg (mode, op_false);
23757 x = gen_rtx_IOR (mode, cmp, op_false);
23758 emit_insn (gen_rtx_SET (dest, x));
23761 else if (TARGET_XOP)
23763 op_true = force_reg (mode, op_true);
23765 if (!nonimmediate_operand (op_false, mode))
23766 op_false = force_reg (mode, op_false);
23768 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
23774 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23777 if (!vector_operand (op_true, mode))
23778 op_true = force_reg (mode, op_true);
23780 op_false = force_reg (mode, op_false);
23786 gen = gen_sse4_1_blendvps;
23790 gen = gen_sse4_1_blendvpd;
23795 gen = gen_sse4_1_blendvss;
23796 op_true = force_reg (mode, op_true);
23802 gen = gen_sse4_1_blendvsd;
23803 op_true = force_reg (mode, op_true);
23812 gen = gen_sse4_1_pblendvb;
23813 if (mode != V16QImode)
23814 d = gen_reg_rtx (V16QImode);
23815 op_false = gen_lowpart (V16QImode, op_false);
23816 op_true = gen_lowpart (V16QImode, op_true);
23817 cmp = gen_lowpart (V16QImode, cmp);
23822 gen = gen_avx_blendvps256;
23826 gen = gen_avx_blendvpd256;
23834 gen = gen_avx2_pblendvb;
23835 if (mode != V32QImode)
23836 d = gen_reg_rtx (V32QImode);
23837 op_false = gen_lowpart (V32QImode, op_false);
23838 op_true = gen_lowpart (V32QImode, op_true);
23839 cmp = gen_lowpart (V32QImode, cmp);
23844 gen = gen_avx512bw_blendmv64qi;
23847 gen = gen_avx512bw_blendmv32hi;
23850 gen = gen_avx512f_blendmv16si;
23853 gen = gen_avx512f_blendmv8di;
23856 gen = gen_avx512f_blendmv8df;
23859 gen = gen_avx512f_blendmv16sf;
23868 emit_insn (gen (d, op_false, op_true, cmp));
23870 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
23874 op_true = force_reg (mode, op_true);
23876 t2 = gen_reg_rtx (mode);
23878 t3 = gen_reg_rtx (mode);
23882 x = gen_rtx_AND (mode, op_true, cmp);
23883 emit_insn (gen_rtx_SET (t2, x));
23885 x = gen_rtx_NOT (mode, cmp);
23886 x = gen_rtx_AND (mode, x, op_false);
23887 emit_insn (gen_rtx_SET (t3, x));
23889 x = gen_rtx_IOR (mode, t3, t2);
23890 emit_insn (gen_rtx_SET (dest, x));
23894 /* Expand a floating-point conditional move. Return true if successful. */
23897 ix86_expand_fp_movcc (rtx operands[])
23899 machine_mode mode = GET_MODE (operands[0]);
23900 enum rtx_code code = GET_CODE (operands[1]);
23901 rtx tmp, compare_op;
23902 rtx op0 = XEXP (operands[1], 0);
23903 rtx op1 = XEXP (operands[1], 1);
23905 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23907 machine_mode cmode;
23909 /* Since we've no cmove for sse registers, don't force bad register
23910 allocation just to gain access to it. Deny movcc when the
23911 comparison mode doesn't match the move mode. */
23912 cmode = GET_MODE (op0);
23913 if (cmode == VOIDmode)
23914 cmode = GET_MODE (op1);
23918 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
23919 if (code == UNKNOWN)
23922 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
23923 operands[2], operands[3]))
23926 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
23927 operands[2], operands[3]);
23928 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
23932 if (GET_MODE (op0) == TImode
23933 || (GET_MODE (op0) == DImode
23937 /* The floating point conditional move instructions don't directly
23938 support conditions resulting from a signed integer comparison. */
23940 compare_op = ix86_expand_compare (code, op0, op1);
23941 if (!fcmov_comparison_operator (compare_op, VOIDmode))
23943 tmp = gen_reg_rtx (QImode);
23944 ix86_expand_setcc (tmp, code, op0, op1);
23946 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
23949 emit_insn (gen_rtx_SET (operands[0],
23950 gen_rtx_IF_THEN_ELSE (mode, compare_op,
23951 operands[2], operands[3])));
23956 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23959 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23980 gcc_unreachable ();
23984 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23987 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
24020 gcc_unreachable ();
24024 /* Return immediate value to be used in UNSPEC_PCMP
24025 for comparison CODE in MODE. */
24028 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
24030 if (FLOAT_MODE_P (mode))
24031 return ix86_fp_cmp_code_to_pcmp_immediate (code);
24032 return ix86_int_cmp_code_to_pcmp_immediate (code);
24035 /* Expand AVX-512 vector comparison. */
24038 ix86_expand_mask_vec_cmp (rtx operands[])
24040 machine_mode mask_mode = GET_MODE (operands[0]);
24041 machine_mode cmp_mode = GET_MODE (operands[2]);
24042 enum rtx_code code = GET_CODE (operands[1]);
24043 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
24053 unspec_code = UNSPEC_UNSIGNED_PCMP;
24057 unspec_code = UNSPEC_PCMP;
24060 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
24063 emit_insn (gen_rtx_SET (operands[0], unspec));
24068 /* Expand fp vector comparison. */
24071 ix86_expand_fp_vec_cmp (rtx operands[])
24073 enum rtx_code code = GET_CODE (operands[1]);
24076 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24077 &operands[2], &operands[3]);
24078 if (code == UNKNOWN)
24081 switch (GET_CODE (operands[1]))
24084 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
24085 operands[3], NULL, NULL);
24086 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
24087 operands[3], NULL, NULL);
24091 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
24092 operands[3], NULL, NULL);
24093 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
24094 operands[3], NULL, NULL);
24098 gcc_unreachable ();
24100 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24104 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
24105 operands[1], operands[2]);
24107 if (operands[0] != cmp)
24108 emit_move_insn (operands[0], cmp);
24114 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
24115 rtx op_true, rtx op_false, bool *negate)
24117 machine_mode data_mode = GET_MODE (dest);
24118 machine_mode mode = GET_MODE (cop0);
24123 /* XOP supports all of the comparisons on all 128-bit vector int types. */
24125 && (mode == V16QImode || mode == V8HImode
24126 || mode == V4SImode || mode == V2DImode))
24130 /* Canonicalize the comparison to EQ, GT, GTU. */
24141 code = reverse_condition (code);
24147 code = reverse_condition (code);
24153 std::swap (cop0, cop1);
24154 code = swap_condition (code);
24158 gcc_unreachable ();
24161 /* Only SSE4.1/SSE4.2 supports V2DImode. */
24162 if (mode == V2DImode)
24167 /* SSE4.1 supports EQ. */
24168 if (!TARGET_SSE4_1)
24174 /* SSE4.2 supports GT/GTU. */
24175 if (!TARGET_SSE4_2)
24180 gcc_unreachable ();
24184 rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode);
24185 rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode);
24187 std::swap (optrue, opfalse);
24189 /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when
24190 not using integer masks into min (x, y) == x ? -1 : 0 (i.e.
24191 min (x, y) == x). While we add one instruction (the minimum),
24192 we remove the need for two instructions in the negation, as the
24193 result is done this way.
24194 When using masks, do it for SI/DImode element types, as it is shorter
24195 than the two subtractions. */
24197 && GET_MODE_SIZE (mode) != 64
24198 && vector_all_ones_operand (opfalse, data_mode)
24199 && optrue == CONST0_RTX (data_mode))
24201 && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
24202 /* Don't do it if not using integer masks and we'd end up with
24203 the right values in the registers though. */
24204 && (GET_MODE_SIZE (mode) == 64
24205 || !vector_all_ones_operand (optrue, data_mode)
24206 || opfalse != CONST0_RTX (data_mode))))
24208 rtx (*gen) (rtx, rtx, rtx) = NULL;
24213 gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3;
24216 gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3;
24217 cop0 = force_reg (mode, cop0);
24218 cop1 = force_reg (mode, cop1);
24222 gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3;
24226 gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3;
24230 gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3;
24233 if (TARGET_AVX512VL)
24235 gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3;
24236 cop0 = force_reg (mode, cop0);
24237 cop1 = force_reg (mode, cop1);
24241 if (code == GTU && TARGET_SSE2)
24242 gen = gen_uminv16qi3;
24243 else if (code == GT && TARGET_SSE4_1)
24244 gen = gen_sminv16qi3;
24247 if (code == GTU && TARGET_SSE4_1)
24248 gen = gen_uminv8hi3;
24249 else if (code == GT && TARGET_SSE2)
24250 gen = gen_sminv8hi3;
24254 gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3;
24257 if (TARGET_AVX512VL)
24259 gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3;
24260 cop0 = force_reg (mode, cop0);
24261 cop1 = force_reg (mode, cop1);
24270 rtx tem = gen_reg_rtx (mode);
24271 if (!vector_operand (cop0, mode))
24272 cop0 = force_reg (mode, cop0);
24273 if (!vector_operand (cop1, mode))
24274 cop1 = force_reg (mode, cop1);
24275 *negate = !*negate;
24276 emit_insn (gen (tem, cop0, cop1));
24282 /* Unsigned parallel compare is not supported by the hardware.
24283 Play some tricks to turn this into a signed comparison
24287 cop0 = force_reg (mode, cop0);
24299 rtx (*gen_sub3) (rtx, rtx, rtx);
24303 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
24304 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
24305 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
24306 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
24307 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
24308 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
24310 gcc_unreachable ();
24312 /* Subtract (-(INT MAX) - 1) from both operands to make
24314 mask = ix86_build_signbit_mask (mode, true, false);
24315 t1 = gen_reg_rtx (mode);
24316 emit_insn (gen_sub3 (t1, cop0, mask));
24318 t2 = gen_reg_rtx (mode);
24319 emit_insn (gen_sub3 (t2, cop1, mask));
24333 /* Perform a parallel unsigned saturating subtraction. */
24334 x = gen_reg_rtx (mode);
24335 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
24339 cop1 = CONST0_RTX (mode);
24341 *negate = !*negate;
24345 gcc_unreachable ();
24351 std::swap (op_true, op_false);
24353 /* Allow the comparison to be done in one mode, but the movcc to
24354 happen in another mode. */
24355 if (data_mode == mode)
24357 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
24358 op_true, op_false);
24362 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
24363 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
24364 op_true, op_false);
24365 if (GET_MODE (x) == mode)
24366 x = gen_lowpart (data_mode, x);
24372 /* Expand integer vector comparison. */
24375 ix86_expand_int_vec_cmp (rtx operands[])
24377 rtx_code code = GET_CODE (operands[1]);
24378 bool negate = false;
24379 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
24380 operands[3], NULL, NULL, &negate);
24386 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
24387 CONST0_RTX (GET_MODE (cmp)),
24388 NULL, NULL, &negate);
24390 gcc_assert (!negate);
24392 if (operands[0] != cmp)
24393 emit_move_insn (operands[0], cmp);
24398 /* Expand a floating-point vector conditional move; a vcond operation
24399 rather than a movcc operation. */
24402 ix86_expand_fp_vcond (rtx operands[])
24404 enum rtx_code code = GET_CODE (operands[3]);
24407 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24408 &operands[4], &operands[5]);
24409 if (code == UNKNOWN)
24412 switch (GET_CODE (operands[3]))
24415 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
24416 operands[5], operands[0], operands[0]);
24417 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
24418 operands[5], operands[1], operands[2]);
24422 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
24423 operands[5], operands[0], operands[0]);
24424 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24425 operands[5], operands[1], operands[2]);
24429 gcc_unreachable ();
24431 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24433 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24437 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24438 operands[5], operands[1], operands[2]))
24441 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24442 operands[1], operands[2]);
24443 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24447 /* Expand a signed/unsigned integral vector conditional move. */
24450 ix86_expand_int_vcond (rtx operands[])
24452 machine_mode data_mode = GET_MODE (operands[0]);
24453 machine_mode mode = GET_MODE (operands[4]);
24454 enum rtx_code code = GET_CODE (operands[3]);
24455 bool negate = false;
24458 cop0 = operands[4];
24459 cop1 = operands[5];
24461 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24462 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24463 if ((code == LT || code == GE)
24464 && data_mode == mode
24465 && cop1 == CONST0_RTX (mode)
24466 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24467 && GET_MODE_UNIT_SIZE (data_mode) > 1
24468 && GET_MODE_UNIT_SIZE (data_mode) <= 8
24469 && (GET_MODE_SIZE (data_mode) == 16
24470 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24472 rtx negop = operands[2 - (code == LT)];
24473 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24474 if (negop == CONST1_RTX (data_mode))
24476 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24477 operands[0], 1, OPTAB_DIRECT);
24478 if (res != operands[0])
24479 emit_move_insn (operands[0], res);
24482 else if (GET_MODE_INNER (data_mode) != DImode
24483 && vector_all_ones_operand (negop, data_mode))
24485 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24486 operands[0], 0, OPTAB_DIRECT);
24487 if (res != operands[0])
24488 emit_move_insn (operands[0], res);
24493 if (!nonimmediate_operand (cop1, mode))
24494 cop1 = force_reg (mode, cop1);
24495 if (!general_operand (operands[1], data_mode))
24496 operands[1] = force_reg (data_mode, operands[1]);
24497 if (!general_operand (operands[2], data_mode))
24498 operands[2] = force_reg (data_mode, operands[2]);
24500 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24501 operands[1], operands[2], &negate);
24506 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24507 operands[2-negate]);
24511 /* AVX512F does support 64-byte integer vector operations,
24512 thus the longest vector we are faced with is V64QImode. */
24513 #define MAX_VECT_LEN 64
24515 struct expand_vec_perm_d
24517 rtx target, op0, op1;
24518 unsigned char perm[MAX_VECT_LEN];
24519 machine_mode vmode;
24520 unsigned char nelt;
24521 bool one_operand_p;
24526 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24527 struct expand_vec_perm_d *d)
24529 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24530 expander, so args are either in d, or in op0, op1 etc. */
24531 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24532 machine_mode maskmode = mode;
24533 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24538 if (TARGET_AVX512VL && TARGET_AVX512BW)
24539 gen = gen_avx512vl_vpermt2varv8hi3;
24542 if (TARGET_AVX512VL && TARGET_AVX512BW)
24543 gen = gen_avx512vl_vpermt2varv16hi3;
24546 if (TARGET_AVX512VBMI)
24547 gen = gen_avx512bw_vpermt2varv64qi3;
24550 if (TARGET_AVX512BW)
24551 gen = gen_avx512bw_vpermt2varv32hi3;
24554 if (TARGET_AVX512VL)
24555 gen = gen_avx512vl_vpermt2varv4si3;
24558 if (TARGET_AVX512VL)
24559 gen = gen_avx512vl_vpermt2varv8si3;
24562 if (TARGET_AVX512F)
24563 gen = gen_avx512f_vpermt2varv16si3;
24566 if (TARGET_AVX512VL)
24568 gen = gen_avx512vl_vpermt2varv4sf3;
24569 maskmode = V4SImode;
24573 if (TARGET_AVX512VL)
24575 gen = gen_avx512vl_vpermt2varv8sf3;
24576 maskmode = V8SImode;
24580 if (TARGET_AVX512F)
24582 gen = gen_avx512f_vpermt2varv16sf3;
24583 maskmode = V16SImode;
24587 if (TARGET_AVX512VL)
24588 gen = gen_avx512vl_vpermt2varv2di3;
24591 if (TARGET_AVX512VL)
24592 gen = gen_avx512vl_vpermt2varv4di3;
24595 if (TARGET_AVX512F)
24596 gen = gen_avx512f_vpermt2varv8di3;
24599 if (TARGET_AVX512VL)
24601 gen = gen_avx512vl_vpermt2varv2df3;
24602 maskmode = V2DImode;
24606 if (TARGET_AVX512VL)
24608 gen = gen_avx512vl_vpermt2varv4df3;
24609 maskmode = V4DImode;
24613 if (TARGET_AVX512F)
24615 gen = gen_avx512f_vpermt2varv8df3;
24616 maskmode = V8DImode;
24626 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24627 expander, so args are either in d, or in op0, op1 etc. */
24631 target = d->target;
24634 for (int i = 0; i < d->nelt; ++i)
24635 vec[i] = GEN_INT (d->perm[i]);
24636 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24639 emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24643 /* Expand a variable vector permutation. */
24646 ix86_expand_vec_perm (rtx operands[])
24648 rtx target = operands[0];
24649 rtx op0 = operands[1];
24650 rtx op1 = operands[2];
24651 rtx mask = operands[3];
24652 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24653 machine_mode mode = GET_MODE (op0);
24654 machine_mode maskmode = GET_MODE (mask);
24656 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24658 /* Number of elements in the vector. */
24659 w = GET_MODE_NUNITS (mode);
24660 e = GET_MODE_UNIT_SIZE (mode);
24661 gcc_assert (w <= 64);
24663 if (TARGET_AVX512F && one_operand_shuffle)
24665 rtx (*gen) (rtx, rtx, rtx) = NULL;
24669 gen =gen_avx512f_permvarv16si;
24672 gen = gen_avx512f_permvarv16sf;
24675 gen = gen_avx512f_permvarv8di;
24678 gen = gen_avx512f_permvarv8df;
24685 emit_insn (gen (target, op0, mask));
24690 if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
24695 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
24697 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24698 an constant shuffle operand. With a tiny bit of effort we can
24699 use VPERMD instead. A re-interpretation stall for V4DFmode is
24700 unfortunate but there's no avoiding it.
24701 Similarly for V16HImode we don't have instructions for variable
24702 shuffling, while for V32QImode we can use after preparing suitable
24703 masks vpshufb; vpshufb; vpermq; vpor. */
24705 if (mode == V16HImode)
24707 maskmode = mode = V32QImode;
24713 maskmode = mode = V8SImode;
24717 t1 = gen_reg_rtx (maskmode);
24719 /* Replicate the low bits of the V4DImode mask into V8SImode:
24721 t1 = { A A B B C C D D }. */
24722 for (i = 0; i < w / 2; ++i)
24723 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24724 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24725 vt = force_reg (maskmode, vt);
24726 mask = gen_lowpart (maskmode, mask);
24727 if (maskmode == V8SImode)
24728 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
24730 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
24732 /* Multiply the shuffle indicies by two. */
24733 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
24736 /* Add one to the odd shuffle indicies:
24737 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24738 for (i = 0; i < w / 2; ++i)
24740 vec[i * 2] = const0_rtx;
24741 vec[i * 2 + 1] = const1_rtx;
24743 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24744 vt = validize_mem (force_const_mem (maskmode, vt));
24745 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
24748 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24749 operands[3] = mask = t1;
24750 target = gen_reg_rtx (mode);
24751 op0 = gen_lowpart (mode, op0);
24752 op1 = gen_lowpart (mode, op1);
24758 /* The VPERMD and VPERMPS instructions already properly ignore
24759 the high bits of the shuffle elements. No need for us to
24760 perform an AND ourselves. */
24761 if (one_operand_shuffle)
24763 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
24764 if (target != operands[0])
24765 emit_move_insn (operands[0],
24766 gen_lowpart (GET_MODE (operands[0]), target));
24770 t1 = gen_reg_rtx (V8SImode);
24771 t2 = gen_reg_rtx (V8SImode);
24772 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
24773 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
24779 mask = gen_lowpart (V8SImode, mask);
24780 if (one_operand_shuffle)
24781 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
24784 t1 = gen_reg_rtx (V8SFmode);
24785 t2 = gen_reg_rtx (V8SFmode);
24786 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
24787 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
24793 /* By combining the two 128-bit input vectors into one 256-bit
24794 input vector, we can use VPERMD and VPERMPS for the full
24795 two-operand shuffle. */
24796 t1 = gen_reg_rtx (V8SImode);
24797 t2 = gen_reg_rtx (V8SImode);
24798 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
24799 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24800 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
24801 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
24805 t1 = gen_reg_rtx (V8SFmode);
24806 t2 = gen_reg_rtx (V8SImode);
24807 mask = gen_lowpart (V4SImode, mask);
24808 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
24809 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24810 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
24811 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
24815 t1 = gen_reg_rtx (V32QImode);
24816 t2 = gen_reg_rtx (V32QImode);
24817 t3 = gen_reg_rtx (V32QImode);
24818 vt2 = GEN_INT (-128);
24819 vt = gen_const_vec_duplicate (V32QImode, vt2);
24820 vt = force_reg (V32QImode, vt);
24821 for (i = 0; i < 32; i++)
24822 vec[i] = i < 16 ? vt2 : const0_rtx;
24823 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24824 vt2 = force_reg (V32QImode, vt2);
24825 /* From mask create two adjusted masks, which contain the same
24826 bits as mask in the low 7 bits of each vector element.
24827 The first mask will have the most significant bit clear
24828 if it requests element from the same 128-bit lane
24829 and MSB set if it requests element from the other 128-bit lane.
24830 The second mask will have the opposite values of the MSB,
24831 and additionally will have its 128-bit lanes swapped.
24832 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24833 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24834 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24835 stands for other 12 bytes. */
24836 /* The bit whether element is from the same lane or the other
24837 lane is bit 4, so shift it up by 3 to the MSB position. */
24838 t5 = gen_reg_rtx (V4DImode);
24839 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
24841 /* Clear MSB bits from the mask just in case it had them set. */
24842 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
24843 /* After this t1 will have MSB set for elements from other lane. */
24844 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
24845 /* Clear bits other than MSB. */
24846 emit_insn (gen_andv32qi3 (t1, t1, vt));
24847 /* Or in the lower bits from mask into t3. */
24848 emit_insn (gen_iorv32qi3 (t3, t1, t2));
24849 /* And invert MSB bits in t1, so MSB is set for elements from the same
24851 emit_insn (gen_xorv32qi3 (t1, t1, vt));
24852 /* Swap 128-bit lanes in t3. */
24853 t6 = gen_reg_rtx (V4DImode);
24854 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
24855 const2_rtx, GEN_INT (3),
24856 const0_rtx, const1_rtx));
24857 /* And or in the lower bits from mask into t1. */
24858 emit_insn (gen_iorv32qi3 (t1, t1, t2));
24859 if (one_operand_shuffle)
24861 /* Each of these shuffles will put 0s in places where
24862 element from the other 128-bit lane is needed, otherwise
24863 will shuffle in the requested value. */
24864 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
24865 gen_lowpart (V32QImode, t6)));
24866 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
24867 /* For t3 the 128-bit lanes are swapped again. */
24868 t7 = gen_reg_rtx (V4DImode);
24869 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
24870 const2_rtx, GEN_INT (3),
24871 const0_rtx, const1_rtx));
24872 /* And oring both together leads to the result. */
24873 emit_insn (gen_iorv32qi3 (target, t1,
24874 gen_lowpart (V32QImode, t7)));
24875 if (target != operands[0])
24876 emit_move_insn (operands[0],
24877 gen_lowpart (GET_MODE (operands[0]), target));
24881 t4 = gen_reg_rtx (V32QImode);
24882 /* Similarly to the above one_operand_shuffle code,
24883 just for repeated twice for each operand. merge_two:
24884 code will merge the two results together. */
24885 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
24886 gen_lowpart (V32QImode, t6)));
24887 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
24888 gen_lowpart (V32QImode, t6)));
24889 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
24890 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
24891 t7 = gen_reg_rtx (V4DImode);
24892 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
24893 const2_rtx, GEN_INT (3),
24894 const0_rtx, const1_rtx));
24895 t8 = gen_reg_rtx (V4DImode);
24896 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
24897 const2_rtx, GEN_INT (3),
24898 const0_rtx, const1_rtx));
24899 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
24900 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
24906 gcc_assert (GET_MODE_SIZE (mode) <= 16);
24913 /* The XOP VPPERM insn supports three inputs. By ignoring the
24914 one_operand_shuffle special case, we avoid creating another
24915 set of constant vectors in memory. */
24916 one_operand_shuffle = false;
24918 /* mask = mask & {2*w-1, ...} */
24919 vt = GEN_INT (2*w - 1);
24923 /* mask = mask & {w-1, ...} */
24924 vt = GEN_INT (w - 1);
24927 vt = gen_const_vec_duplicate (maskmode, vt);
24928 mask = expand_simple_binop (maskmode, AND, mask, vt,
24929 NULL_RTX, 0, OPTAB_DIRECT);
24931 /* For non-QImode operations, convert the word permutation control
24932 into a byte permutation control. */
24933 if (mode != V16QImode)
24935 mask = expand_simple_binop (maskmode, ASHIFT, mask,
24936 GEN_INT (exact_log2 (e)),
24937 NULL_RTX, 0, OPTAB_DIRECT);
24939 /* Convert mask to vector of chars. */
24940 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
24942 /* Replicate each of the input bytes into byte positions:
24943 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24944 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24945 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24946 for (i = 0; i < 16; ++i)
24947 vec[i] = GEN_INT (i/e * e);
24948 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24949 vt = validize_mem (force_const_mem (V16QImode, vt));
24951 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
24953 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
24955 /* Convert it into the byte positions by doing
24956 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24957 for (i = 0; i < 16; ++i)
24958 vec[i] = GEN_INT (i % e);
24959 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24960 vt = validize_mem (force_const_mem (V16QImode, vt));
24961 emit_insn (gen_addv16qi3 (mask, mask, vt));
24964 /* The actual shuffle operations all operate on V16QImode. */
24965 op0 = gen_lowpart (V16QImode, op0);
24966 op1 = gen_lowpart (V16QImode, op1);
24970 if (GET_MODE (target) != V16QImode)
24971 target = gen_reg_rtx (V16QImode);
24972 emit_insn (gen_xop_pperm (target, op0, op1, mask));
24973 if (target != operands[0])
24974 emit_move_insn (operands[0],
24975 gen_lowpart (GET_MODE (operands[0]), target));
24977 else if (one_operand_shuffle)
24979 if (GET_MODE (target) != V16QImode)
24980 target = gen_reg_rtx (V16QImode);
24981 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
24982 if (target != operands[0])
24983 emit_move_insn (operands[0],
24984 gen_lowpart (GET_MODE (operands[0]), target));
24991 /* Shuffle the two input vectors independently. */
24992 t1 = gen_reg_rtx (V16QImode);
24993 t2 = gen_reg_rtx (V16QImode);
24994 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
24995 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
24998 /* Then merge them together. The key is whether any given control
24999 element contained a bit set that indicates the second word. */
25000 mask = operands[3];
25002 if (maskmode == V2DImode && !TARGET_SSE4_1)
25004 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
25005 more shuffle to convert the V2DI input mask into a V4SI
25006 input mask. At which point the masking that expand_int_vcond
25007 will work as desired. */
25008 rtx t3 = gen_reg_rtx (V4SImode);
25009 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
25010 const0_rtx, const0_rtx,
25011 const2_rtx, const2_rtx));
25013 maskmode = V4SImode;
25017 vt = gen_const_vec_duplicate (maskmode, vt);
25018 vt = force_reg (maskmode, vt);
25019 mask = expand_simple_binop (maskmode, AND, mask, vt,
25020 NULL_RTX, 0, OPTAB_DIRECT);
25022 if (GET_MODE (target) != mode)
25023 target = gen_reg_rtx (mode);
25025 xops[1] = gen_lowpart (mode, t2);
25026 xops[2] = gen_lowpart (mode, t1);
25027 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
25030 ok = ix86_expand_int_vcond (xops);
25032 if (target != operands[0])
25033 emit_move_insn (operands[0],
25034 gen_lowpart (GET_MODE (operands[0]), target));
25038 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
25039 true if we should do zero extension, else sign extension. HIGH_P is
25040 true if we want the N/2 high elements, else the low elements. */
25043 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
25045 machine_mode imode = GET_MODE (src);
25050 rtx (*unpack)(rtx, rtx);
25051 rtx (*extract)(rtx, rtx) = NULL;
25052 machine_mode halfmode = BLKmode;
25058 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
25060 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
25061 halfmode = V32QImode;
25063 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
25067 unpack = gen_avx2_zero_extendv16qiv16hi2;
25069 unpack = gen_avx2_sign_extendv16qiv16hi2;
25070 halfmode = V16QImode;
25072 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
25076 unpack = gen_avx512f_zero_extendv16hiv16si2;
25078 unpack = gen_avx512f_sign_extendv16hiv16si2;
25079 halfmode = V16HImode;
25081 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
25085 unpack = gen_avx2_zero_extendv8hiv8si2;
25087 unpack = gen_avx2_sign_extendv8hiv8si2;
25088 halfmode = V8HImode;
25090 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
25094 unpack = gen_avx512f_zero_extendv8siv8di2;
25096 unpack = gen_avx512f_sign_extendv8siv8di2;
25097 halfmode = V8SImode;
25099 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
25103 unpack = gen_avx2_zero_extendv4siv4di2;
25105 unpack = gen_avx2_sign_extendv4siv4di2;
25106 halfmode = V4SImode;
25108 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
25112 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
25114 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
25118 unpack = gen_sse4_1_zero_extendv4hiv4si2;
25120 unpack = gen_sse4_1_sign_extendv4hiv4si2;
25124 unpack = gen_sse4_1_zero_extendv2siv2di2;
25126 unpack = gen_sse4_1_sign_extendv2siv2di2;
25129 gcc_unreachable ();
25132 if (GET_MODE_SIZE (imode) >= 32)
25134 tmp = gen_reg_rtx (halfmode);
25135 emit_insn (extract (tmp, src));
25139 /* Shift higher 8 bytes to lower 8 bytes. */
25140 tmp = gen_reg_rtx (V1TImode);
25141 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
25143 tmp = gen_lowpart (imode, tmp);
25148 emit_insn (unpack (dest, tmp));
25152 rtx (*unpack)(rtx, rtx, rtx);
25158 unpack = gen_vec_interleave_highv16qi;
25160 unpack = gen_vec_interleave_lowv16qi;
25164 unpack = gen_vec_interleave_highv8hi;
25166 unpack = gen_vec_interleave_lowv8hi;
25170 unpack = gen_vec_interleave_highv4si;
25172 unpack = gen_vec_interleave_lowv4si;
25175 gcc_unreachable ();
25179 tmp = force_reg (imode, CONST0_RTX (imode));
25181 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
25182 src, pc_rtx, pc_rtx);
25184 rtx tmp2 = gen_reg_rtx (imode);
25185 emit_insn (unpack (tmp2, src, tmp));
25186 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
25190 /* Expand conditional increment or decrement using adb/sbb instructions.
25191 The default case using setcc followed by the conditional move can be
25192 done by generic code. */
25194 ix86_expand_int_addcc (rtx operands[])
25196 enum rtx_code code = GET_CODE (operands[1]);
25198 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
25200 rtx val = const0_rtx;
25201 bool fpcmp = false;
25203 rtx op0 = XEXP (operands[1], 0);
25204 rtx op1 = XEXP (operands[1], 1);
25206 if (operands[3] != const1_rtx
25207 && operands[3] != constm1_rtx)
25209 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
25211 code = GET_CODE (compare_op);
25213 flags = XEXP (compare_op, 0);
25215 if (GET_MODE (flags) == CCFPmode)
25218 code = ix86_fp_compare_code_to_integer (code);
25225 PUT_CODE (compare_op,
25226 reverse_condition_maybe_unordered
25227 (GET_CODE (compare_op)));
25229 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
25232 mode = GET_MODE (operands[0]);
25234 /* Construct either adc or sbb insn. */
25235 if ((code == LTU) == (operands[3] == constm1_rtx))
25240 insn = gen_subqi3_carry;
25243 insn = gen_subhi3_carry;
25246 insn = gen_subsi3_carry;
25249 insn = gen_subdi3_carry;
25252 gcc_unreachable ();
25260 insn = gen_addqi3_carry;
25263 insn = gen_addhi3_carry;
25266 insn = gen_addsi3_carry;
25269 insn = gen_adddi3_carry;
25272 gcc_unreachable ();
25275 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
25281 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
25282 but works for floating pointer parameters and nonoffsetable memories.
25283 For pushes, it returns just stack offsets; the values will be saved
25284 in the right order. Maximally three parts are generated. */
25287 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
25292 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
25294 size = (GET_MODE_SIZE (mode) + 4) / 8;
25296 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
25297 gcc_assert (size >= 2 && size <= 4);
25299 /* Optimize constant pool reference to immediates. This is used by fp
25300 moves, that force all constants to memory to allow combining. */
25301 if (MEM_P (operand) && MEM_READONLY_P (operand))
25302 operand = avoid_constant_pool_reference (operand);
25304 if (MEM_P (operand) && !offsettable_memref_p (operand))
25306 /* The only non-offsetable memories we handle are pushes. */
25307 int ok = push_operand (operand, VOIDmode);
25311 operand = copy_rtx (operand);
25312 PUT_MODE (operand, word_mode);
25313 parts[0] = parts[1] = parts[2] = parts[3] = operand;
25317 if (GET_CODE (operand) == CONST_VECTOR)
25319 scalar_int_mode imode = int_mode_for_mode (mode).require ();
25320 /* Caution: if we looked through a constant pool memory above,
25321 the operand may actually have a different mode now. That's
25322 ok, since we want to pun this all the way back to an integer. */
25323 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
25324 gcc_assert (operand != NULL);
25330 if (mode == DImode)
25331 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25336 if (REG_P (operand))
25338 gcc_assert (reload_completed);
25339 for (i = 0; i < size; i++)
25340 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
25342 else if (offsettable_memref_p (operand))
25344 operand = adjust_address (operand, SImode, 0);
25345 parts[0] = operand;
25346 for (i = 1; i < size; i++)
25347 parts[i] = adjust_address (operand, SImode, 4 * i);
25349 else if (CONST_DOUBLE_P (operand))
25351 const REAL_VALUE_TYPE *r;
25354 r = CONST_DOUBLE_REAL_VALUE (operand);
25358 real_to_target (l, r, mode);
25359 parts[3] = gen_int_mode (l[3], SImode);
25360 parts[2] = gen_int_mode (l[2], SImode);
25363 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25364 long double may not be 80-bit. */
25365 real_to_target (l, r, mode);
25366 parts[2] = gen_int_mode (l[2], SImode);
25369 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
25372 gcc_unreachable ();
25374 parts[1] = gen_int_mode (l[1], SImode);
25375 parts[0] = gen_int_mode (l[0], SImode);
25378 gcc_unreachable ();
25383 if (mode == TImode)
25384 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25385 if (mode == XFmode || mode == TFmode)
25387 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
25388 if (REG_P (operand))
25390 gcc_assert (reload_completed);
25391 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
25392 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
25394 else if (offsettable_memref_p (operand))
25396 operand = adjust_address (operand, DImode, 0);
25397 parts[0] = operand;
25398 parts[1] = adjust_address (operand, upper_mode, 8);
25400 else if (CONST_DOUBLE_P (operand))
25404 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
25406 /* real_to_target puts 32-bit pieces in each long. */
25407 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
25408 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
25411 if (upper_mode == SImode)
25412 parts[1] = gen_int_mode (l[2], SImode);
25415 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
25416 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
25420 gcc_unreachable ();
25427 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25428 Return false when normal moves are needed; true when all required
25429 insns have been emitted. Operands 2-4 contain the input values
25430 int the correct order; operands 5-7 contain the output values. */
25433 ix86_split_long_move (rtx operands[])
25438 int collisions = 0;
25439 machine_mode mode = GET_MODE (operands[0]);
25440 bool collisionparts[4];
25442 /* The DFmode expanders may ask us to move double.
25443 For 64bit target this is single move. By hiding the fact
25444 here we simplify i386.md splitters. */
25445 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25447 /* Optimize constant pool reference to immediates. This is used by
25448 fp moves, that force all constants to memory to allow combining. */
25450 if (MEM_P (operands[1])
25451 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25452 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25453 operands[1] = get_pool_constant (XEXP (operands[1], 0));
25454 if (push_operand (operands[0], VOIDmode))
25456 operands[0] = copy_rtx (operands[0]);
25457 PUT_MODE (operands[0], word_mode);
25460 operands[0] = gen_lowpart (DImode, operands[0]);
25461 operands[1] = gen_lowpart (DImode, operands[1]);
25462 emit_move_insn (operands[0], operands[1]);
25466 /* The only non-offsettable memory we handle is push. */
25467 if (push_operand (operands[0], VOIDmode))
25470 gcc_assert (!MEM_P (operands[0])
25471 || offsettable_memref_p (operands[0]));
25473 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25474 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25476 /* When emitting push, take care for source operands on the stack. */
25477 if (push && MEM_P (operands[1])
25478 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25480 rtx src_base = XEXP (part[1][nparts - 1], 0);
25482 /* Compensate for the stack decrement by 4. */
25483 if (!TARGET_64BIT && nparts == 3
25484 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25485 src_base = plus_constant (Pmode, src_base, 4);
25487 /* src_base refers to the stack pointer and is
25488 automatically decreased by emitted push. */
25489 for (i = 0; i < nparts; i++)
25490 part[1][i] = change_address (part[1][i],
25491 GET_MODE (part[1][i]), src_base);
25494 /* We need to do copy in the right order in case an address register
25495 of the source overlaps the destination. */
25496 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25500 for (i = 0; i < nparts; i++)
25503 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25504 if (collisionparts[i])
25508 /* Collision in the middle part can be handled by reordering. */
25509 if (collisions == 1 && nparts == 3 && collisionparts [1])
25511 std::swap (part[0][1], part[0][2]);
25512 std::swap (part[1][1], part[1][2]);
25514 else if (collisions == 1
25516 && (collisionparts [1] || collisionparts [2]))
25518 if (collisionparts [1])
25520 std::swap (part[0][1], part[0][2]);
25521 std::swap (part[1][1], part[1][2]);
25525 std::swap (part[0][2], part[0][3]);
25526 std::swap (part[1][2], part[1][3]);
25530 /* If there are more collisions, we can't handle it by reordering.
25531 Do an lea to the last part and use only one colliding move. */
25532 else if (collisions > 1)
25538 base = part[0][nparts - 1];
25540 /* Handle the case when the last part isn't valid for lea.
25541 Happens in 64-bit mode storing the 12-byte XFmode. */
25542 if (GET_MODE (base) != Pmode)
25543 base = gen_rtx_REG (Pmode, REGNO (base));
25545 addr = XEXP (part[1][0], 0);
25546 if (TARGET_TLS_DIRECT_SEG_REFS)
25548 struct ix86_address parts;
25549 int ok = ix86_decompose_address (addr, &parts);
25551 /* It is not valid to use %gs: or %fs: in lea. */
25552 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25554 emit_insn (gen_rtx_SET (base, addr));
25555 part[1][0] = replace_equiv_address (part[1][0], base);
25556 for (i = 1; i < nparts; i++)
25558 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25559 part[1][i] = replace_equiv_address (part[1][i], tmp);
25570 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25571 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25572 stack_pointer_rtx, GEN_INT (-4)));
25573 emit_move_insn (part[0][2], part[1][2]);
25575 else if (nparts == 4)
25577 emit_move_insn (part[0][3], part[1][3]);
25578 emit_move_insn (part[0][2], part[1][2]);
25583 /* In 64bit mode we don't have 32bit push available. In case this is
25584 register, it is OK - we will just use larger counterpart. We also
25585 retype memory - these comes from attempt to avoid REX prefix on
25586 moving of second half of TFmode value. */
25587 if (GET_MODE (part[1][1]) == SImode)
25589 switch (GET_CODE (part[1][1]))
25592 part[1][1] = adjust_address (part[1][1], DImode, 0);
25596 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25600 gcc_unreachable ();
25603 if (GET_MODE (part[1][0]) == SImode)
25604 part[1][0] = part[1][1];
25607 emit_move_insn (part[0][1], part[1][1]);
25608 emit_move_insn (part[0][0], part[1][0]);
25612 /* Choose correct order to not overwrite the source before it is copied. */
25613 if ((REG_P (part[0][0])
25614 && REG_P (part[1][1])
25615 && (REGNO (part[0][0]) == REGNO (part[1][1])
25617 && REGNO (part[0][0]) == REGNO (part[1][2]))
25619 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25621 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25623 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25625 operands[2 + i] = part[0][j];
25626 operands[6 + i] = part[1][j];
25631 for (i = 0; i < nparts; i++)
25633 operands[2 + i] = part[0][i];
25634 operands[6 + i] = part[1][i];
25638 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25639 if (optimize_insn_for_size_p ())
25641 for (j = 0; j < nparts - 1; j++)
25642 if (CONST_INT_P (operands[6 + j])
25643 && operands[6 + j] != const0_rtx
25644 && REG_P (operands[2 + j]))
25645 for (i = j; i < nparts - 1; i++)
25646 if (CONST_INT_P (operands[7 + i])
25647 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25648 operands[7 + i] = operands[2 + j];
25651 for (i = 0; i < nparts; i++)
25652 emit_move_insn (operands[2 + i], operands[6 + i]);
25657 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25658 left shift by a constant, either using a single shift or
25659 a sequence of add instructions. */
25662 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25664 rtx (*insn)(rtx, rtx, rtx);
25667 || (count * ix86_cost->add <= ix86_cost->shift_const
25668 && !optimize_insn_for_size_p ()))
25670 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25671 while (count-- > 0)
25672 emit_insn (insn (operand, operand, operand));
25676 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25677 emit_insn (insn (operand, operand, GEN_INT (count)));
25682 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25684 rtx (*gen_ashl3)(rtx, rtx, rtx);
25685 rtx (*gen_shld)(rtx, rtx, rtx);
25686 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25688 rtx low[2], high[2];
25691 if (CONST_INT_P (operands[2]))
25693 split_double_mode (mode, operands, 2, low, high);
25694 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25696 if (count >= half_width)
25698 emit_move_insn (high[0], low[1]);
25699 emit_move_insn (low[0], const0_rtx);
25701 if (count > half_width)
25702 ix86_expand_ashl_const (high[0], count - half_width, mode);
25706 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25708 if (!rtx_equal_p (operands[0], operands[1]))
25709 emit_move_insn (operands[0], operands[1]);
25711 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25712 ix86_expand_ashl_const (low[0], count, mode);
25717 split_double_mode (mode, operands, 1, low, high);
25719 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25721 if (operands[1] == const1_rtx)
25723 /* Assuming we've chosen a QImode capable registers, then 1 << N
25724 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25725 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25727 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25729 ix86_expand_clear (low[0]);
25730 ix86_expand_clear (high[0]);
25731 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
25733 d = gen_lowpart (QImode, low[0]);
25734 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25735 s = gen_rtx_EQ (QImode, flags, const0_rtx);
25736 emit_insn (gen_rtx_SET (d, s));
25738 d = gen_lowpart (QImode, high[0]);
25739 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25740 s = gen_rtx_NE (QImode, flags, const0_rtx);
25741 emit_insn (gen_rtx_SET (d, s));
25744 /* Otherwise, we can get the same results by manually performing
25745 a bit extract operation on bit 5/6, and then performing the two
25746 shifts. The two methods of getting 0/1 into low/high are exactly
25747 the same size. Avoiding the shift in the bit extract case helps
25748 pentium4 a bit; no one else seems to care much either way. */
25751 machine_mode half_mode;
25752 rtx (*gen_lshr3)(rtx, rtx, rtx);
25753 rtx (*gen_and3)(rtx, rtx, rtx);
25754 rtx (*gen_xor3)(rtx, rtx, rtx);
25755 HOST_WIDE_INT bits;
25758 if (mode == DImode)
25760 half_mode = SImode;
25761 gen_lshr3 = gen_lshrsi3;
25762 gen_and3 = gen_andsi3;
25763 gen_xor3 = gen_xorsi3;
25768 half_mode = DImode;
25769 gen_lshr3 = gen_lshrdi3;
25770 gen_and3 = gen_anddi3;
25771 gen_xor3 = gen_xordi3;
25775 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
25776 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
25778 x = gen_lowpart (half_mode, operands[2]);
25779 emit_insn (gen_rtx_SET (high[0], x));
25781 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
25782 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
25783 emit_move_insn (low[0], high[0]);
25784 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
25787 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25788 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
25792 if (operands[1] == constm1_rtx)
25794 /* For -1 << N, we can avoid the shld instruction, because we
25795 know that we're shifting 0...31/63 ones into a -1. */
25796 emit_move_insn (low[0], constm1_rtx);
25797 if (optimize_insn_for_size_p ())
25798 emit_move_insn (high[0], low[0]);
25800 emit_move_insn (high[0], constm1_rtx);
25804 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25806 if (!rtx_equal_p (operands[0], operands[1]))
25807 emit_move_insn (operands[0], operands[1]);
25809 split_double_mode (mode, operands, 1, low, high);
25810 emit_insn (gen_shld (high[0], low[0], operands[2]));
25813 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25815 if (TARGET_CMOVE && scratch)
25817 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25818 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25820 ix86_expand_clear (scratch);
25821 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
25825 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25826 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25828 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
25833 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
25835 rtx (*gen_ashr3)(rtx, rtx, rtx)
25836 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
25837 rtx (*gen_shrd)(rtx, rtx, rtx);
25838 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25840 rtx low[2], high[2];
25843 if (CONST_INT_P (operands[2]))
25845 split_double_mode (mode, operands, 2, low, high);
25846 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25848 if (count == GET_MODE_BITSIZE (mode) - 1)
25850 emit_move_insn (high[0], high[1]);
25851 emit_insn (gen_ashr3 (high[0], high[0],
25852 GEN_INT (half_width - 1)));
25853 emit_move_insn (low[0], high[0]);
25856 else if (count >= half_width)
25858 emit_move_insn (low[0], high[1]);
25859 emit_move_insn (high[0], low[0]);
25860 emit_insn (gen_ashr3 (high[0], high[0],
25861 GEN_INT (half_width - 1)));
25863 if (count > half_width)
25864 emit_insn (gen_ashr3 (low[0], low[0],
25865 GEN_INT (count - half_width)));
25869 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25871 if (!rtx_equal_p (operands[0], operands[1]))
25872 emit_move_insn (operands[0], operands[1]);
25874 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25875 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
25880 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25882 if (!rtx_equal_p (operands[0], operands[1]))
25883 emit_move_insn (operands[0], operands[1]);
25885 split_double_mode (mode, operands, 1, low, high);
25887 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25888 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
25890 if (TARGET_CMOVE && scratch)
25892 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25893 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25895 emit_move_insn (scratch, high[0]);
25896 emit_insn (gen_ashr3 (scratch, scratch,
25897 GEN_INT (half_width - 1)));
25898 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25903 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
25904 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
25906 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
25912 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
25914 rtx (*gen_lshr3)(rtx, rtx, rtx)
25915 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
25916 rtx (*gen_shrd)(rtx, rtx, rtx);
25917 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25919 rtx low[2], high[2];
25922 if (CONST_INT_P (operands[2]))
25924 split_double_mode (mode, operands, 2, low, high);
25925 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25927 if (count >= half_width)
25929 emit_move_insn (low[0], high[1]);
25930 ix86_expand_clear (high[0]);
25932 if (count > half_width)
25933 emit_insn (gen_lshr3 (low[0], low[0],
25934 GEN_INT (count - half_width)));
25938 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25940 if (!rtx_equal_p (operands[0], operands[1]))
25941 emit_move_insn (operands[0], operands[1]);
25943 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25944 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
25949 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25951 if (!rtx_equal_p (operands[0], operands[1]))
25952 emit_move_insn (operands[0], operands[1]);
25954 split_double_mode (mode, operands, 1, low, high);
25956 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25957 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
25959 if (TARGET_CMOVE && scratch)
25961 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25962 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25964 ix86_expand_clear (scratch);
25965 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25970 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25971 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25973 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
25978 /* Predict just emitted jump instruction to be taken with probability PROB. */
25980 predict_jump (int prob)
25982 rtx_insn *insn = get_last_insn ();
25983 gcc_assert (JUMP_P (insn));
25984 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
25987 /* Helper function for the string operations below. Dest VARIABLE whether
25988 it is aligned to VALUE bytes. If true, jump to the label. */
25989 static rtx_code_label *
25990 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
25992 rtx_code_label *label = gen_label_rtx ();
25993 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
25994 if (GET_MODE (variable) == DImode)
25995 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
25997 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
25998 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
26001 predict_jump (REG_BR_PROB_BASE * 50 / 100);
26003 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26007 /* Adjust COUNTER by the VALUE. */
26009 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
26011 rtx (*gen_add)(rtx, rtx, rtx)
26012 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
26014 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
26017 /* Zero extend possibly SImode EXP to Pmode register. */
26019 ix86_zero_extend_to_Pmode (rtx exp)
26021 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
26024 /* Divide COUNTREG by SCALE. */
26026 scale_counter (rtx countreg, int scale)
26032 if (CONST_INT_P (countreg))
26033 return GEN_INT (INTVAL (countreg) / scale);
26034 gcc_assert (REG_P (countreg));
26036 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
26037 GEN_INT (exact_log2 (scale)),
26038 NULL, 1, OPTAB_DIRECT);
26042 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
26043 DImode for constant loop counts. */
26045 static machine_mode
26046 counter_mode (rtx count_exp)
26048 if (GET_MODE (count_exp) != VOIDmode)
26049 return GET_MODE (count_exp);
26050 if (!CONST_INT_P (count_exp))
26052 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
26057 /* Copy the address to a Pmode register. This is used for x32 to
26058 truncate DImode TLS address to a SImode register. */
26061 ix86_copy_addr_to_reg (rtx addr)
26064 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
26066 reg = copy_addr_to_reg (addr);
26067 REG_POINTER (reg) = 1;
26072 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
26073 reg = copy_to_mode_reg (DImode, addr);
26074 REG_POINTER (reg) = 1;
26075 return gen_rtx_SUBREG (SImode, reg, 0);
26079 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
26080 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
26081 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
26082 memory by VALUE (supposed to be in MODE).
26084 The size is rounded down to whole number of chunk size moved at once.
26085 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
26089 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
26090 rtx destptr, rtx srcptr, rtx value,
26091 rtx count, machine_mode mode, int unroll,
26092 int expected_size, bool issetmem)
26094 rtx_code_label *out_label, *top_label;
26096 machine_mode iter_mode = counter_mode (count);
26097 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
26098 rtx piece_size = GEN_INT (piece_size_n);
26099 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
26103 top_label = gen_label_rtx ();
26104 out_label = gen_label_rtx ();
26105 iter = gen_reg_rtx (iter_mode);
26107 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
26108 NULL, 1, OPTAB_DIRECT);
26109 /* Those two should combine. */
26110 if (piece_size == const1_rtx)
26112 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
26114 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26116 emit_move_insn (iter, const0_rtx);
26118 emit_label (top_label);
26120 tmp = convert_modes (Pmode, iter_mode, iter, true);
26122 /* This assert could be relaxed - in this case we'll need to compute
26123 smallest power of two, containing in PIECE_SIZE_N and pass it to
26125 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
26126 destmem = offset_address (destmem, tmp, piece_size_n);
26127 destmem = adjust_address (destmem, mode, 0);
26131 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
26132 srcmem = adjust_address (srcmem, mode, 0);
26134 /* When unrolling for chips that reorder memory reads and writes,
26135 we can save registers by using single temporary.
26136 Also using 4 temporaries is overkill in 32bit mode. */
26137 if (!TARGET_64BIT && 0)
26139 for (i = 0; i < unroll; i++)
26143 destmem = adjust_address (copy_rtx (destmem), mode,
26144 GET_MODE_SIZE (mode));
26145 srcmem = adjust_address (copy_rtx (srcmem), mode,
26146 GET_MODE_SIZE (mode));
26148 emit_move_insn (destmem, srcmem);
26154 gcc_assert (unroll <= 4);
26155 for (i = 0; i < unroll; i++)
26157 tmpreg[i] = gen_reg_rtx (mode);
26159 srcmem = adjust_address (copy_rtx (srcmem), mode,
26160 GET_MODE_SIZE (mode));
26161 emit_move_insn (tmpreg[i], srcmem);
26163 for (i = 0; i < unroll; i++)
26166 destmem = adjust_address (copy_rtx (destmem), mode,
26167 GET_MODE_SIZE (mode));
26168 emit_move_insn (destmem, tmpreg[i]);
26173 for (i = 0; i < unroll; i++)
26176 destmem = adjust_address (copy_rtx (destmem), mode,
26177 GET_MODE_SIZE (mode));
26178 emit_move_insn (destmem, value);
26181 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
26182 true, OPTAB_LIB_WIDEN);
26184 emit_move_insn (iter, tmp);
26186 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
26188 if (expected_size != -1)
26190 expected_size /= GET_MODE_SIZE (mode) * unroll;
26191 if (expected_size == 0)
26193 else if (expected_size > REG_BR_PROB_BASE)
26194 predict_jump (REG_BR_PROB_BASE - 1);
26196 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2)
26200 predict_jump (REG_BR_PROB_BASE * 80 / 100);
26201 iter = ix86_zero_extend_to_Pmode (iter);
26202 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
26203 true, OPTAB_LIB_WIDEN);
26204 if (tmp != destptr)
26205 emit_move_insn (destptr, tmp);
26208 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
26209 true, OPTAB_LIB_WIDEN);
26211 emit_move_insn (srcptr, tmp);
26213 emit_label (out_label);
26216 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
26217 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
26218 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
26219 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
26220 ORIG_VALUE is the original value passed to memset to fill the memory with.
26221 Other arguments have same meaning as for previous function. */
26224 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
26225 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
26227 machine_mode mode, bool issetmem)
26232 HOST_WIDE_INT rounded_count;
26234 /* If possible, it is shorter to use rep movs.
26235 TODO: Maybe it is better to move this logic to decide_alg. */
26236 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
26237 && (!issetmem || orig_value == const0_rtx))
26240 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
26241 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
26243 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
26244 GET_MODE_SIZE (mode)));
26245 if (mode != QImode)
26247 destexp = gen_rtx_ASHIFT (Pmode, countreg,
26248 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26249 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
26252 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
26253 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
26256 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26257 destmem = shallow_copy_rtx (destmem);
26258 set_mem_size (destmem, rounded_count);
26260 else if (MEM_SIZE_KNOWN_P (destmem))
26261 clear_mem_size (destmem);
26265 value = force_reg (mode, gen_lowpart (mode, value));
26266 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
26270 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
26271 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
26272 if (mode != QImode)
26274 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
26275 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26276 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
26279 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
26280 if (CONST_INT_P (count))
26283 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26284 srcmem = shallow_copy_rtx (srcmem);
26285 set_mem_size (srcmem, rounded_count);
26289 if (MEM_SIZE_KNOWN_P (srcmem))
26290 clear_mem_size (srcmem);
26292 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
26297 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
26299 SRC is passed by pointer to be updated on return.
26300 Return value is updated DST. */
26302 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
26303 HOST_WIDE_INT size_to_move)
26305 rtx dst = destmem, src = *srcmem, adjust, tempreg;
26306 enum insn_code code;
26307 machine_mode move_mode;
26310 /* Find the widest mode in which we could perform moves.
26311 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26312 it until move of such size is supported. */
26313 piece_size = 1 << floor_log2 (size_to_move);
26314 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
26315 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26317 gcc_assert (piece_size > 1);
26321 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26322 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26323 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26325 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26326 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
26327 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26329 move_mode = word_mode;
26330 piece_size = GET_MODE_SIZE (move_mode);
26331 code = optab_handler (mov_optab, move_mode);
26334 gcc_assert (code != CODE_FOR_nothing);
26336 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26337 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
26339 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26340 gcc_assert (size_to_move % piece_size == 0);
26341 adjust = GEN_INT (piece_size);
26342 for (i = 0; i < size_to_move; i += piece_size)
26344 /* We move from memory to memory, so we'll need to do it via
26345 a temporary register. */
26346 tempreg = gen_reg_rtx (move_mode);
26347 emit_insn (GEN_FCN (code) (tempreg, src));
26348 emit_insn (GEN_FCN (code) (dst, tempreg));
26350 emit_move_insn (destptr,
26351 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26352 emit_move_insn (srcptr,
26353 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
26355 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26357 src = adjust_automodify_address_nv (src, move_mode, srcptr,
26361 /* Update DST and SRC rtx. */
26366 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
26368 expand_movmem_epilogue (rtx destmem, rtx srcmem,
26369 rtx destptr, rtx srcptr, rtx count, int max_size)
26372 if (CONST_INT_P (count))
26374 HOST_WIDE_INT countval = INTVAL (count);
26375 HOST_WIDE_INT epilogue_size = countval % max_size;
26378 /* For now MAX_SIZE should be a power of 2. This assert could be
26379 relaxed, but it'll require a bit more complicated epilogue
26381 gcc_assert ((max_size & (max_size - 1)) == 0);
26382 for (i = max_size; i >= 1; i >>= 1)
26384 if (epilogue_size & i)
26385 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26391 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
26392 count, 1, OPTAB_DIRECT);
26393 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
26394 count, QImode, 1, 4, false);
26398 /* When there are stringops, we can cheaply increase dest and src pointers.
26399 Otherwise we save code size by maintaining offset (zero is readily
26400 available from preceding rep operation) and using x86 addressing modes.
26402 if (TARGET_SINGLE_STRINGOP)
26406 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26407 src = change_address (srcmem, SImode, srcptr);
26408 dest = change_address (destmem, SImode, destptr);
26409 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26410 emit_label (label);
26411 LABEL_NUSES (label) = 1;
26415 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26416 src = change_address (srcmem, HImode, srcptr);
26417 dest = change_address (destmem, HImode, destptr);
26418 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26419 emit_label (label);
26420 LABEL_NUSES (label) = 1;
26424 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26425 src = change_address (srcmem, QImode, srcptr);
26426 dest = change_address (destmem, QImode, destptr);
26427 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26428 emit_label (label);
26429 LABEL_NUSES (label) = 1;
26434 rtx offset = force_reg (Pmode, const0_rtx);
26439 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26440 src = change_address (srcmem, SImode, srcptr);
26441 dest = change_address (destmem, SImode, destptr);
26442 emit_move_insn (dest, src);
26443 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26444 true, OPTAB_LIB_WIDEN);
26446 emit_move_insn (offset, tmp);
26447 emit_label (label);
26448 LABEL_NUSES (label) = 1;
26452 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26453 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26454 src = change_address (srcmem, HImode, tmp);
26455 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26456 dest = change_address (destmem, HImode, tmp);
26457 emit_move_insn (dest, src);
26458 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26459 true, OPTAB_LIB_WIDEN);
26461 emit_move_insn (offset, tmp);
26462 emit_label (label);
26463 LABEL_NUSES (label) = 1;
26467 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26468 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26469 src = change_address (srcmem, QImode, tmp);
26470 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26471 dest = change_address (destmem, QImode, tmp);
26472 emit_move_insn (dest, src);
26473 emit_label (label);
26474 LABEL_NUSES (label) = 1;
26479 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26480 with value PROMOTED_VAL.
26481 SRC is passed by pointer to be updated on return.
26482 Return value is updated DST. */
26484 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26485 HOST_WIDE_INT size_to_move)
26487 rtx dst = destmem, adjust;
26488 enum insn_code code;
26489 machine_mode move_mode;
26492 /* Find the widest mode in which we could perform moves.
26493 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26494 it until move of such size is supported. */
26495 move_mode = GET_MODE (promoted_val);
26496 if (move_mode == VOIDmode)
26497 move_mode = QImode;
26498 if (size_to_move < GET_MODE_SIZE (move_mode))
26500 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26501 move_mode = int_mode_for_size (move_bits, 0).require ();
26502 promoted_val = gen_lowpart (move_mode, promoted_val);
26504 piece_size = GET_MODE_SIZE (move_mode);
26505 code = optab_handler (mov_optab, move_mode);
26506 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26508 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26510 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26511 gcc_assert (size_to_move % piece_size == 0);
26512 adjust = GEN_INT (piece_size);
26513 for (i = 0; i < size_to_move; i += piece_size)
26515 if (piece_size <= GET_MODE_SIZE (word_mode))
26517 emit_insn (gen_strset (destptr, dst, promoted_val));
26518 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26523 emit_insn (GEN_FCN (code) (dst, promoted_val));
26525 emit_move_insn (destptr,
26526 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26528 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26532 /* Update DST rtx. */
26535 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26537 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26538 rtx count, int max_size)
26540 count = expand_simple_binop (counter_mode (count), AND, count,
26541 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26542 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26543 gen_lowpart (QImode, value), count, QImode,
26544 1, max_size / 2, true);
26547 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26549 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26550 rtx count, int max_size)
26554 if (CONST_INT_P (count))
26556 HOST_WIDE_INT countval = INTVAL (count);
26557 HOST_WIDE_INT epilogue_size = countval % max_size;
26560 /* For now MAX_SIZE should be a power of 2. This assert could be
26561 relaxed, but it'll require a bit more complicated epilogue
26563 gcc_assert ((max_size & (max_size - 1)) == 0);
26564 for (i = max_size; i >= 1; i >>= 1)
26566 if (epilogue_size & i)
26568 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26569 destmem = emit_memset (destmem, destptr, vec_value, i);
26571 destmem = emit_memset (destmem, destptr, value, i);
26578 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26583 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26586 dest = change_address (destmem, DImode, destptr);
26587 emit_insn (gen_strset (destptr, dest, value));
26588 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26589 emit_insn (gen_strset (destptr, dest, value));
26593 dest = change_address (destmem, SImode, destptr);
26594 emit_insn (gen_strset (destptr, dest, value));
26595 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26596 emit_insn (gen_strset (destptr, dest, value));
26597 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26598 emit_insn (gen_strset (destptr, dest, value));
26599 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26600 emit_insn (gen_strset (destptr, dest, value));
26602 emit_label (label);
26603 LABEL_NUSES (label) = 1;
26607 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26610 dest = change_address (destmem, DImode, destptr);
26611 emit_insn (gen_strset (destptr, dest, value));
26615 dest = change_address (destmem, SImode, destptr);
26616 emit_insn (gen_strset (destptr, dest, value));
26617 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26618 emit_insn (gen_strset (destptr, dest, value));
26620 emit_label (label);
26621 LABEL_NUSES (label) = 1;
26625 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26626 dest = change_address (destmem, SImode, destptr);
26627 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26628 emit_label (label);
26629 LABEL_NUSES (label) = 1;
26633 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26634 dest = change_address (destmem, HImode, destptr);
26635 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26636 emit_label (label);
26637 LABEL_NUSES (label) = 1;
26641 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26642 dest = change_address (destmem, QImode, destptr);
26643 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26644 emit_label (label);
26645 LABEL_NUSES (label) = 1;
26649 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26650 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26651 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26653 Return value is updated DESTMEM. */
26655 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26656 rtx destptr, rtx srcptr, rtx value,
26657 rtx vec_value, rtx count, int align,
26658 int desired_alignment, bool issetmem)
26661 for (i = 1; i < desired_alignment; i <<= 1)
26665 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26668 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26669 destmem = emit_memset (destmem, destptr, vec_value, i);
26671 destmem = emit_memset (destmem, destptr, value, i);
26674 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26675 ix86_adjust_counter (count, i);
26676 emit_label (label);
26677 LABEL_NUSES (label) = 1;
26678 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26684 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26685 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26686 and jump to DONE_LABEL. */
26688 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
26689 rtx destptr, rtx srcptr,
26690 rtx value, rtx vec_value,
26691 rtx count, int size,
26692 rtx done_label, bool issetmem)
26694 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
26695 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
26699 /* If we do not have vector value to copy, we must reduce size. */
26704 if (GET_MODE (value) == VOIDmode && size > 8)
26706 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26707 mode = GET_MODE (value);
26710 mode = GET_MODE (vec_value), value = vec_value;
26714 /* Choose appropriate vector mode. */
26716 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26717 else if (size >= 16)
26718 mode = TARGET_SSE ? V16QImode : DImode;
26719 srcmem = change_address (srcmem, mode, srcptr);
26721 destmem = change_address (destmem, mode, destptr);
26722 modesize = GEN_INT (GET_MODE_SIZE (mode));
26723 gcc_assert (GET_MODE_SIZE (mode) <= size);
26724 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26727 emit_move_insn (destmem, gen_lowpart (mode, value));
26730 emit_move_insn (destmem, srcmem);
26731 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26733 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26736 destmem = offset_address (destmem, count, 1);
26737 destmem = offset_address (destmem, GEN_INT (-2 * size),
26738 GET_MODE_SIZE (mode));
26741 srcmem = offset_address (srcmem, count, 1);
26742 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
26743 GET_MODE_SIZE (mode));
26745 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26748 emit_move_insn (destmem, gen_lowpart (mode, value));
26751 emit_move_insn (destmem, srcmem);
26752 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26754 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26756 emit_jump_insn (gen_jump (done_label));
26759 emit_label (label);
26760 LABEL_NUSES (label) = 1;
26763 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26764 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26765 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26766 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26767 DONE_LABEL is a label after the whole copying sequence. The label is created
26768 on demand if *DONE_LABEL is NULL.
26769 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26770 bounds after the initial copies.
26772 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26773 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26774 we will dispatch to a library call for large blocks.
26776 In pseudocode we do:
26780 Assume that SIZE is 4. Bigger sizes are handled analogously
26783 copy 4 bytes from SRCPTR to DESTPTR
26784 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26789 copy 1 byte from SRCPTR to DESTPTR
26792 copy 2 bytes from SRCPTR to DESTPTR
26793 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26798 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26799 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26801 OLD_DESPTR = DESTPTR;
26802 Align DESTPTR up to DESIRED_ALIGN
26803 SRCPTR += DESTPTR - OLD_DESTPTR
26804 COUNT -= DEST_PTR - OLD_DESTPTR
26806 Round COUNT down to multiple of SIZE
26807 << optional caller supplied zero size guard is here >>
26808 << optional caller supplied dynamic check is here >>
26809 << caller supplied main copy loop is here >>
26814 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
26815 rtx *destptr, rtx *srcptr,
26817 rtx value, rtx vec_value,
26819 rtx_code_label **done_label,
26823 unsigned HOST_WIDE_INT *min_size,
26824 bool dynamic_check,
26827 rtx_code_label *loop_label = NULL, *label;
26830 int prolog_size = 0;
26833 /* Chose proper value to copy. */
26834 if (issetmem && VECTOR_MODE_P (mode))
26835 mode_value = vec_value;
26837 mode_value = value;
26838 gcc_assert (GET_MODE_SIZE (mode) <= size);
26840 /* See if block is big or small, handle small blocks. */
26841 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
26844 loop_label = gen_label_rtx ();
26847 *done_label = gen_label_rtx ();
26849 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
26853 /* Handle sizes > 3. */
26854 for (;size2 > 2; size2 >>= 1)
26855 expand_small_movmem_or_setmem (destmem, srcmem,
26859 size2, *done_label, issetmem);
26860 /* Nothing to copy? Jump to DONE_LABEL if so */
26861 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
26864 /* Do a byte copy. */
26865 destmem = change_address (destmem, QImode, *destptr);
26867 emit_move_insn (destmem, gen_lowpart (QImode, value));
26870 srcmem = change_address (srcmem, QImode, *srcptr);
26871 emit_move_insn (destmem, srcmem);
26874 /* Handle sizes 2 and 3. */
26875 label = ix86_expand_aligntest (*count, 2, false);
26876 destmem = change_address (destmem, HImode, *destptr);
26877 destmem = offset_address (destmem, *count, 1);
26878 destmem = offset_address (destmem, GEN_INT (-2), 2);
26880 emit_move_insn (destmem, gen_lowpart (HImode, value));
26883 srcmem = change_address (srcmem, HImode, *srcptr);
26884 srcmem = offset_address (srcmem, *count, 1);
26885 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
26886 emit_move_insn (destmem, srcmem);
26889 emit_label (label);
26890 LABEL_NUSES (label) = 1;
26891 emit_jump_insn (gen_jump (*done_label));
26895 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
26896 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
26898 /* Start memcpy for COUNT >= SIZE. */
26901 emit_label (loop_label);
26902 LABEL_NUSES (loop_label) = 1;
26905 /* Copy first desired_align bytes. */
26907 srcmem = change_address (srcmem, mode, *srcptr);
26908 destmem = change_address (destmem, mode, *destptr);
26909 modesize = GEN_INT (GET_MODE_SIZE (mode));
26910 for (n = 0; prolog_size < desired_align - align; n++)
26913 emit_move_insn (destmem, mode_value);
26916 emit_move_insn (destmem, srcmem);
26917 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26919 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26920 prolog_size += GET_MODE_SIZE (mode);
26924 /* Copy last SIZE bytes. */
26925 destmem = offset_address (destmem, *count, 1);
26926 destmem = offset_address (destmem,
26927 GEN_INT (-size - prolog_size),
26930 emit_move_insn (destmem, mode_value);
26933 srcmem = offset_address (srcmem, *count, 1);
26934 srcmem = offset_address (srcmem,
26935 GEN_INT (-size - prolog_size),
26937 emit_move_insn (destmem, srcmem);
26939 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
26941 destmem = offset_address (destmem, modesize, 1);
26943 emit_move_insn (destmem, mode_value);
26946 srcmem = offset_address (srcmem, modesize, 1);
26947 emit_move_insn (destmem, srcmem);
26951 /* Align destination. */
26952 if (desired_align > 1 && desired_align > align)
26954 rtx saveddest = *destptr;
26956 gcc_assert (desired_align <= size);
26957 /* Align destptr up, place it to new register. */
26958 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
26959 GEN_INT (prolog_size),
26960 NULL_RTX, 1, OPTAB_DIRECT);
26961 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
26962 REG_POINTER (*destptr) = 1;
26963 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
26964 GEN_INT (-desired_align),
26965 *destptr, 1, OPTAB_DIRECT);
26966 /* See how many bytes we skipped. */
26967 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
26969 saveddest, 1, OPTAB_DIRECT);
26970 /* Adjust srcptr and count. */
26972 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
26973 saveddest, *srcptr, 1, OPTAB_DIRECT);
26974 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26975 saveddest, *count, 1, OPTAB_DIRECT);
26976 /* We copied at most size + prolog_size. */
26977 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
26979 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
26983 /* Our loops always round down the block size, but for dispatch to
26984 library we need precise value. */
26986 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
26987 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
26991 gcc_assert (prolog_size == 0);
26992 /* Decrease count, so we won't end up copying last word twice. */
26993 if (!CONST_INT_P (*count))
26994 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26995 constm1_rtx, *count, 1, OPTAB_DIRECT);
26997 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
26998 (unsigned HOST_WIDE_INT)size));
27000 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
27005 /* This function is like the previous one, except here we know how many bytes
27006 need to be copied. That allows us to update alignment not only of DST, which
27007 is returned, but also of SRC, which is passed as a pointer for that
27010 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
27011 rtx srcreg, rtx value, rtx vec_value,
27012 int desired_align, int align_bytes,
27016 rtx orig_dst = dst;
27017 rtx orig_src = NULL;
27018 int piece_size = 1;
27019 int copied_bytes = 0;
27023 gcc_assert (srcp != NULL);
27028 for (piece_size = 1;
27029 piece_size <= desired_align && copied_bytes < align_bytes;
27032 if (align_bytes & piece_size)
27036 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
27037 dst = emit_memset (dst, destreg, vec_value, piece_size);
27039 dst = emit_memset (dst, destreg, value, piece_size);
27042 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
27043 copied_bytes += piece_size;
27046 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
27047 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27048 if (MEM_SIZE_KNOWN_P (orig_dst))
27049 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
27053 int src_align_bytes = get_mem_align_offset (src, desired_align
27055 if (src_align_bytes >= 0)
27056 src_align_bytes = desired_align - src_align_bytes;
27057 if (src_align_bytes >= 0)
27059 unsigned int src_align;
27060 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
27062 if ((src_align_bytes & (src_align - 1))
27063 == (align_bytes & (src_align - 1)))
27066 if (src_align > (unsigned int) desired_align)
27067 src_align = desired_align;
27068 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
27069 set_mem_align (src, src_align * BITS_PER_UNIT);
27071 if (MEM_SIZE_KNOWN_P (orig_src))
27072 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
27079 /* Return true if ALG can be used in current context.
27080 Assume we expand memset if MEMSET is true. */
27082 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
27084 if (alg == no_stringop)
27086 if (alg == vector_loop)
27087 return TARGET_SSE || TARGET_AVX;
27088 /* Algorithms using the rep prefix want at least edi and ecx;
27089 additionally, memset wants eax and memcpy wants esi. Don't
27090 consider such algorithms if the user has appropriated those
27091 registers for their own purposes, or if we have a non-default
27092 address space, since some string insns cannot override the segment. */
27093 if (alg == rep_prefix_1_byte
27094 || alg == rep_prefix_4_byte
27095 || alg == rep_prefix_8_byte)
27099 if (fixed_regs[CX_REG]
27100 || fixed_regs[DI_REG]
27101 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
27107 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
27108 static enum stringop_alg
27109 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
27110 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
27111 bool memset, bool zero_memset, bool have_as,
27112 int *dynamic_check, bool *noalign, bool recur)
27114 const struct stringop_algs *algs;
27115 bool optimize_for_speed;
27117 const struct processor_costs *cost;
27119 bool any_alg_usable_p = false;
27122 *dynamic_check = -1;
27124 /* Even if the string operation call is cold, we still might spend a lot
27125 of time processing large blocks. */
27126 if (optimize_function_for_size_p (cfun)
27127 || (optimize_insn_for_size_p ()
27129 || (expected_size != -1 && expected_size < 256))))
27130 optimize_for_speed = false;
27132 optimize_for_speed = true;
27134 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
27136 algs = &cost->memset[TARGET_64BIT != 0];
27138 algs = &cost->memcpy[TARGET_64BIT != 0];
27140 /* See maximal size for user defined algorithm. */
27141 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27143 enum stringop_alg candidate = algs->size[i].alg;
27144 bool usable = alg_usable_p (candidate, memset, have_as);
27145 any_alg_usable_p |= usable;
27147 if (candidate != libcall && candidate && usable)
27148 max = algs->size[i].max;
27151 /* If expected size is not known but max size is small enough
27152 so inline version is a win, set expected size into
27154 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
27155 && expected_size == -1)
27156 expected_size = min_size / 2 + max_size / 2;
27158 /* If user specified the algorithm, honor it if possible. */
27159 if (ix86_stringop_alg != no_stringop
27160 && alg_usable_p (ix86_stringop_alg, memset, have_as))
27161 return ix86_stringop_alg;
27162 /* rep; movq or rep; movl is the smallest variant. */
27163 else if (!optimize_for_speed)
27166 if (!count || (count & 3) || (memset && !zero_memset))
27167 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
27168 ? rep_prefix_1_byte : loop_1_byte;
27170 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
27171 ? rep_prefix_4_byte : loop;
27173 /* Very tiny blocks are best handled via the loop, REP is expensive to
27175 else if (expected_size != -1 && expected_size < 4)
27176 return loop_1_byte;
27177 else if (expected_size != -1)
27179 enum stringop_alg alg = libcall;
27180 bool alg_noalign = false;
27181 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27183 /* We get here if the algorithms that were not libcall-based
27184 were rep-prefix based and we are unable to use rep prefixes
27185 based on global register usage. Break out of the loop and
27186 use the heuristic below. */
27187 if (algs->size[i].max == 0)
27189 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
27191 enum stringop_alg candidate = algs->size[i].alg;
27193 if (candidate != libcall
27194 && alg_usable_p (candidate, memset, have_as))
27197 alg_noalign = algs->size[i].noalign;
27199 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
27200 last non-libcall inline algorithm. */
27201 if (TARGET_INLINE_ALL_STRINGOPS)
27203 /* When the current size is best to be copied by a libcall,
27204 but we are still forced to inline, run the heuristic below
27205 that will pick code for medium sized blocks. */
27206 if (alg != libcall)
27208 *noalign = alg_noalign;
27211 else if (!any_alg_usable_p)
27214 else if (alg_usable_p (candidate, memset, have_as))
27216 *noalign = algs->size[i].noalign;
27222 /* When asked to inline the call anyway, try to pick meaningful choice.
27223 We look for maximal size of block that is faster to copy by hand and
27224 take blocks of at most of that size guessing that average size will
27225 be roughly half of the block.
27227 If this turns out to be bad, we might simply specify the preferred
27228 choice in ix86_costs. */
27229 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27230 && (algs->unknown_size == libcall
27231 || !alg_usable_p (algs->unknown_size, memset, have_as)))
27233 enum stringop_alg alg;
27234 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
27236 /* If there aren't any usable algorithms or if recursing already,
27237 then recursing on smaller sizes or same size isn't going to
27238 find anything. Just return the simple byte-at-a-time copy loop. */
27239 if (!any_alg_usable_p || recur)
27241 /* Pick something reasonable. */
27242 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
27243 *dynamic_check = 128;
27244 return loop_1_byte;
27246 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
27247 zero_memset, have_as, dynamic_check, noalign, true);
27248 gcc_assert (*dynamic_check == -1);
27249 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27250 *dynamic_check = max;
27252 gcc_assert (alg != libcall);
27255 return (alg_usable_p (algs->unknown_size, memset, have_as)
27256 ? algs->unknown_size : libcall);
27259 /* Decide on alignment. We know that the operand is already aligned to ALIGN
27260 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
27262 decide_alignment (int align,
27263 enum stringop_alg alg,
27265 machine_mode move_mode)
27267 int desired_align = 0;
27269 gcc_assert (alg != no_stringop);
27271 if (alg == libcall)
27273 if (move_mode == VOIDmode)
27276 desired_align = GET_MODE_SIZE (move_mode);
27277 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
27278 copying whole cacheline at once. */
27279 if (TARGET_PENTIUMPRO
27280 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
27285 if (desired_align < align)
27286 desired_align = align;
27287 if (expected_size != -1 && expected_size < 4)
27288 desired_align = align;
27290 return desired_align;
27294 /* Helper function for memcpy. For QImode value 0xXY produce
27295 0xXYXYXYXY of wide specified by MODE. This is essentially
27296 a * 0x10101010, but we can do slightly better than
27297 synth_mult by unwinding the sequence by hand on CPUs with
27300 promote_duplicated_reg (machine_mode mode, rtx val)
27302 machine_mode valmode = GET_MODE (val);
27304 int nops = mode == DImode ? 3 : 2;
27306 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
27307 if (val == const0_rtx)
27308 return copy_to_mode_reg (mode, CONST0_RTX (mode));
27309 if (CONST_INT_P (val))
27311 HOST_WIDE_INT v = INTVAL (val) & 255;
27315 if (mode == DImode)
27316 v |= (v << 16) << 16;
27317 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
27320 if (valmode == VOIDmode)
27322 if (valmode != QImode)
27323 val = gen_lowpart (QImode, val);
27324 if (mode == QImode)
27326 if (!TARGET_PARTIAL_REG_STALL)
27328 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
27329 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
27330 <= (ix86_cost->shift_const + ix86_cost->add) * nops
27331 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
27333 rtx reg = convert_modes (mode, QImode, val, true);
27334 tmp = promote_duplicated_reg (mode, const1_rtx);
27335 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
27340 rtx reg = convert_modes (mode, QImode, val, true);
27342 if (!TARGET_PARTIAL_REG_STALL)
27343 if (mode == SImode)
27344 emit_insn (gen_insvsi_1 (reg, reg));
27346 emit_insn (gen_insvdi_1 (reg, reg));
27349 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
27350 NULL, 1, OPTAB_DIRECT);
27351 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1,
27354 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
27355 NULL, 1, OPTAB_DIRECT);
27356 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27357 if (mode == SImode)
27359 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
27360 NULL, 1, OPTAB_DIRECT);
27361 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27366 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27367 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27368 alignment from ALIGN to DESIRED_ALIGN. */
27370 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
27376 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
27377 promoted_val = promote_duplicated_reg (DImode, val);
27378 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
27379 promoted_val = promote_duplicated_reg (SImode, val);
27380 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
27381 promoted_val = promote_duplicated_reg (HImode, val);
27383 promoted_val = val;
27385 return promoted_val;
27388 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
27389 operations when profitable. The code depends upon architecture, block size
27390 and alignment, but always has one of the following overall structures:
27392 Aligned move sequence:
27394 1) Prologue guard: Conditional that jumps up to epilogues for small
27395 blocks that can be handled by epilogue alone. This is faster
27396 but also needed for correctness, since prologue assume the block
27397 is larger than the desired alignment.
27399 Optional dynamic check for size and libcall for large
27400 blocks is emitted here too, with -minline-stringops-dynamically.
27402 2) Prologue: copy first few bytes in order to get destination
27403 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
27404 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27405 copied. We emit either a jump tree on power of two sized
27406 blocks, or a byte loop.
27408 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27409 with specified algorithm.
27411 4) Epilogue: code copying tail of the block that is too small to be
27412 handled by main body (or up to size guarded by prologue guard).
27414 Misaligned move sequence
27416 1) missaligned move prologue/epilogue containing:
27417 a) Prologue handling small memory blocks and jumping to done_label
27418 (skipped if blocks are known to be large enough)
27419 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27420 needed by single possibly misaligned move
27421 (skipped if alignment is not needed)
27422 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27424 2) Zero size guard dispatching to done_label, if needed
27426 3) dispatch to library call, if needed,
27428 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27429 with specified algorithm. */
27431 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27432 rtx align_exp, rtx expected_align_exp,
27433 rtx expected_size_exp, rtx min_size_exp,
27434 rtx max_size_exp, rtx probable_max_size_exp,
27439 rtx_code_label *label = NULL;
27441 rtx_code_label *jump_around_label = NULL;
27442 HOST_WIDE_INT align = 1;
27443 unsigned HOST_WIDE_INT count = 0;
27444 HOST_WIDE_INT expected_size = -1;
27445 int size_needed = 0, epilogue_size_needed;
27446 int desired_align = 0, align_bytes = 0;
27447 enum stringop_alg alg;
27448 rtx promoted_val = NULL;
27449 rtx vec_promoted_val = NULL;
27450 bool force_loopy_epilogue = false;
27452 bool need_zero_guard = false;
27454 machine_mode move_mode = VOIDmode;
27455 machine_mode wider_mode;
27456 int unroll_factor = 1;
27457 /* TODO: Once value ranges are available, fill in proper data. */
27458 unsigned HOST_WIDE_INT min_size = 0;
27459 unsigned HOST_WIDE_INT max_size = -1;
27460 unsigned HOST_WIDE_INT probable_max_size = -1;
27461 bool misaligned_prologue_used = false;
27464 if (CONST_INT_P (align_exp))
27465 align = INTVAL (align_exp);
27466 /* i386 can do misaligned access on reasonably increased cost. */
27467 if (CONST_INT_P (expected_align_exp)
27468 && INTVAL (expected_align_exp) > align)
27469 align = INTVAL (expected_align_exp);
27470 /* ALIGN is the minimum of destination and source alignment, but we care here
27471 just about destination alignment. */
27473 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27474 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27476 if (CONST_INT_P (count_exp))
27478 min_size = max_size = probable_max_size = count = expected_size
27479 = INTVAL (count_exp);
27480 /* When COUNT is 0, there is nothing to do. */
27487 min_size = INTVAL (min_size_exp);
27489 max_size = INTVAL (max_size_exp);
27490 if (probable_max_size_exp)
27491 probable_max_size = INTVAL (probable_max_size_exp);
27492 if (CONST_INT_P (expected_size_exp))
27493 expected_size = INTVAL (expected_size_exp);
27496 /* Make sure we don't need to care about overflow later on. */
27497 if (count > (HOST_WIDE_INT_1U << 30))
27500 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27502 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27504 /* Step 0: Decide on preferred algorithm, desired alignment and
27505 size of chunks to be copied by main loop. */
27506 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27508 issetmem && val_exp == const0_rtx, have_as,
27509 &dynamic_check, &noalign, false);
27512 fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
27513 stringop_alg_names[alg]);
27515 if (alg == libcall)
27517 gcc_assert (alg != no_stringop);
27519 /* For now vector-version of memset is generated only for memory zeroing, as
27520 creating of promoted vector value is very cheap in this case. */
27521 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27522 alg = unrolled_loop;
27525 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27526 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27528 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27531 move_mode = word_mode;
27537 gcc_unreachable ();
27539 need_zero_guard = true;
27540 move_mode = QImode;
27543 need_zero_guard = true;
27545 case unrolled_loop:
27546 need_zero_guard = true;
27547 unroll_factor = (TARGET_64BIT ? 4 : 2);
27550 need_zero_guard = true;
27552 /* Find the widest supported mode. */
27553 move_mode = word_mode;
27554 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27555 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27556 move_mode = wider_mode;
27558 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27559 move_mode = TImode;
27561 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27562 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27563 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27565 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27566 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27567 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27568 move_mode = word_mode;
27570 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27572 case rep_prefix_8_byte:
27573 move_mode = DImode;
27575 case rep_prefix_4_byte:
27576 move_mode = SImode;
27578 case rep_prefix_1_byte:
27579 move_mode = QImode;
27582 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27583 epilogue_size_needed = size_needed;
27585 /* If we are going to call any library calls conditionally, make sure any
27586 pending stack adjustment happen before the first conditional branch,
27587 otherwise they will be emitted before the library call only and won't
27588 happen from the other branches. */
27589 if (dynamic_check != -1)
27590 do_pending_stack_adjust ();
27592 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27593 if (!TARGET_ALIGN_STRINGOPS || noalign)
27594 align = desired_align;
27596 /* Step 1: Prologue guard. */
27598 /* Alignment code needs count to be in register. */
27599 if (CONST_INT_P (count_exp) && desired_align > align)
27601 if (INTVAL (count_exp) > desired_align
27602 && INTVAL (count_exp) > size_needed)
27605 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27606 if (align_bytes <= 0)
27609 align_bytes = desired_align - align_bytes;
27611 if (align_bytes == 0)
27612 count_exp = force_reg (counter_mode (count_exp), count_exp);
27614 gcc_assert (desired_align >= 1 && align >= 1);
27616 /* Misaligned move sequences handle both prologue and epilogue at once.
27617 Default code generation results in a smaller code for large alignments
27618 and also avoids redundant job when sizes are known precisely. */
27619 misaligned_prologue_used
27620 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27621 && MAX (desired_align, epilogue_size_needed) <= 32
27622 && desired_align <= epilogue_size_needed
27623 && ((desired_align > align && !align_bytes)
27624 || (!count && epilogue_size_needed > 1)));
27626 /* Do the cheap promotion to allow better CSE across the
27627 main loop and epilogue (ie one load of the big constant in the
27629 For now the misaligned move sequences do not have fast path
27630 without broadcasting. */
27631 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27633 if (alg == vector_loop)
27635 gcc_assert (val_exp == const0_rtx);
27636 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27637 promoted_val = promote_duplicated_reg_to_size (val_exp,
27638 GET_MODE_SIZE (word_mode),
27639 desired_align, align);
27643 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27644 desired_align, align);
27647 /* Misaligned move sequences handles both prologues and epilogues at once.
27648 Default code generation results in smaller code for large alignments and
27649 also avoids redundant job when sizes are known precisely. */
27650 if (misaligned_prologue_used)
27652 /* Misaligned move prologue handled small blocks by itself. */
27653 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27654 (dst, src, &destreg, &srcreg,
27655 move_mode, promoted_val, vec_promoted_val,
27657 &jump_around_label,
27658 desired_align < align
27659 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27660 desired_align, align, &min_size, dynamic_check, issetmem);
27662 src = change_address (src, BLKmode, srcreg);
27663 dst = change_address (dst, BLKmode, destreg);
27664 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27665 epilogue_size_needed = 0;
27666 if (need_zero_guard
27667 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27669 /* It is possible that we copied enough so the main loop will not
27671 gcc_assert (size_needed > 1);
27672 if (jump_around_label == NULL_RTX)
27673 jump_around_label = gen_label_rtx ();
27674 emit_cmp_and_jump_insns (count_exp,
27675 GEN_INT (size_needed),
27676 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27677 if (expected_size == -1
27678 || expected_size < (desired_align - align) / 2 + size_needed)
27679 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27681 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27684 /* Ensure that alignment prologue won't copy past end of block. */
27685 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
27687 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
27688 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27689 Make sure it is power of 2. */
27690 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
27692 /* To improve performance of small blocks, we jump around the VAL
27693 promoting mode. This mean that if the promoted VAL is not constant,
27694 we might not use it in the epilogue and have to use byte
27696 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
27697 force_loopy_epilogue = true;
27698 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27699 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27701 /* If main algorithm works on QImode, no epilogue is needed.
27702 For small sizes just don't align anything. */
27703 if (size_needed == 1)
27704 desired_align = align;
27709 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27711 label = gen_label_rtx ();
27712 emit_cmp_and_jump_insns (count_exp,
27713 GEN_INT (epilogue_size_needed),
27714 LTU, 0, counter_mode (count_exp), 1, label);
27715 if (expected_size == -1 || expected_size < epilogue_size_needed)
27716 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27718 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27722 /* Emit code to decide on runtime whether library call or inline should be
27724 if (dynamic_check != -1)
27726 if (!issetmem && CONST_INT_P (count_exp))
27728 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
27730 emit_block_copy_via_libcall (dst, src, count_exp);
27731 count_exp = const0_rtx;
27737 rtx_code_label *hot_label = gen_label_rtx ();
27738 if (jump_around_label == NULL_RTX)
27739 jump_around_label = gen_label_rtx ();
27740 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
27741 LEU, 0, counter_mode (count_exp),
27743 predict_jump (REG_BR_PROB_BASE * 90 / 100);
27745 set_storage_via_libcall (dst, count_exp, val_exp);
27747 emit_block_copy_via_libcall (dst, src, count_exp);
27748 emit_jump (jump_around_label);
27749 emit_label (hot_label);
27753 /* Step 2: Alignment prologue. */
27754 /* Do the expensive promotion once we branched off the small blocks. */
27755 if (issetmem && !promoted_val)
27756 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27757 desired_align, align);
27759 if (desired_align > align && !misaligned_prologue_used)
27761 if (align_bytes == 0)
27763 /* Except for the first move in prologue, we no longer know
27764 constant offset in aliasing info. It don't seems to worth
27765 the pain to maintain it for the first move, so throw away
27767 dst = change_address (dst, BLKmode, destreg);
27769 src = change_address (src, BLKmode, srcreg);
27770 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
27771 promoted_val, vec_promoted_val,
27772 count_exp, align, desired_align,
27774 /* At most desired_align - align bytes are copied. */
27775 if (min_size < (unsigned)(desired_align - align))
27778 min_size -= desired_align - align;
27782 /* If we know how many bytes need to be stored before dst is
27783 sufficiently aligned, maintain aliasing info accurately. */
27784 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
27792 count_exp = plus_constant (counter_mode (count_exp),
27793 count_exp, -align_bytes);
27794 count -= align_bytes;
27795 min_size -= align_bytes;
27796 max_size -= align_bytes;
27798 if (need_zero_guard
27799 && min_size < (unsigned HOST_WIDE_INT) size_needed
27800 && (count < (unsigned HOST_WIDE_INT) size_needed
27801 || (align_bytes == 0
27802 && count < ((unsigned HOST_WIDE_INT) size_needed
27803 + desired_align - align))))
27805 /* It is possible that we copied enough so the main loop will not
27807 gcc_assert (size_needed > 1);
27808 if (label == NULL_RTX)
27809 label = gen_label_rtx ();
27810 emit_cmp_and_jump_insns (count_exp,
27811 GEN_INT (size_needed),
27812 LTU, 0, counter_mode (count_exp), 1, label);
27813 if (expected_size == -1
27814 || expected_size < (desired_align - align) / 2 + size_needed)
27815 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27817 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27820 if (label && size_needed == 1)
27822 emit_label (label);
27823 LABEL_NUSES (label) = 1;
27825 epilogue_size_needed = 1;
27827 promoted_val = val_exp;
27829 else if (label == NULL_RTX && !misaligned_prologue_used)
27830 epilogue_size_needed = size_needed;
27832 /* Step 3: Main loop. */
27839 gcc_unreachable ();
27842 case unrolled_loop:
27843 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
27844 count_exp, move_mode, unroll_factor,
27845 expected_size, issetmem);
27848 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
27849 vec_promoted_val, count_exp, move_mode,
27850 unroll_factor, expected_size, issetmem);
27852 case rep_prefix_8_byte:
27853 case rep_prefix_4_byte:
27854 case rep_prefix_1_byte:
27855 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
27856 val_exp, count_exp, move_mode, issetmem);
27859 /* Adjust properly the offset of src and dest memory for aliasing. */
27860 if (CONST_INT_P (count_exp))
27863 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
27864 (count / size_needed) * size_needed);
27865 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
27866 (count / size_needed) * size_needed);
27871 src = change_address (src, BLKmode, srcreg);
27872 dst = change_address (dst, BLKmode, destreg);
27875 /* Step 4: Epilogue to copy the remaining bytes. */
27879 /* When the main loop is done, COUNT_EXP might hold original count,
27880 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27881 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27882 bytes. Compensate if needed. */
27884 if (size_needed < epilogue_size_needed)
27886 tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp,
27887 GEN_INT (size_needed - 1), count_exp, 1,
27889 if (tmp != count_exp)
27890 emit_move_insn (count_exp, tmp);
27892 emit_label (label);
27893 LABEL_NUSES (label) = 1;
27896 if (count_exp != const0_rtx && epilogue_size_needed > 1)
27898 if (force_loopy_epilogue)
27899 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
27900 epilogue_size_needed);
27904 expand_setmem_epilogue (dst, destreg, promoted_val,
27905 vec_promoted_val, count_exp,
27906 epilogue_size_needed);
27908 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
27909 epilogue_size_needed);
27912 if (jump_around_label)
27913 emit_label (jump_around_label);
27918 /* Expand the appropriate insns for doing strlen if not just doing
27921 out = result, initialized with the start address
27922 align_rtx = alignment of the address.
27923 scratch = scratch register, initialized with the startaddress when
27924 not aligned, otherwise undefined
27926 This is just the body. It needs the initializations mentioned above and
27927 some address computing at the end. These things are done in i386.md. */
27930 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
27934 rtx_code_label *align_2_label = NULL;
27935 rtx_code_label *align_3_label = NULL;
27936 rtx_code_label *align_4_label = gen_label_rtx ();
27937 rtx_code_label *end_0_label = gen_label_rtx ();
27939 rtx tmpreg = gen_reg_rtx (SImode);
27940 rtx scratch = gen_reg_rtx (SImode);
27944 if (CONST_INT_P (align_rtx))
27945 align = INTVAL (align_rtx);
27947 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27949 /* Is there a known alignment and is it less than 4? */
27952 rtx scratch1 = gen_reg_rtx (Pmode);
27953 emit_move_insn (scratch1, out);
27954 /* Is there a known alignment and is it not 2? */
27957 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
27958 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
27960 /* Leave just the 3 lower bits. */
27961 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
27962 NULL_RTX, 0, OPTAB_WIDEN);
27964 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27965 Pmode, 1, align_4_label);
27966 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
27967 Pmode, 1, align_2_label);
27968 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
27969 Pmode, 1, align_3_label);
27973 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27974 check if is aligned to 4 - byte. */
27976 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
27977 NULL_RTX, 0, OPTAB_WIDEN);
27979 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27980 Pmode, 1, align_4_label);
27983 mem = change_address (src, QImode, out);
27985 /* Now compare the bytes. */
27987 /* Compare the first n unaligned byte on a byte per byte basis. */
27988 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
27989 QImode, 1, end_0_label);
27991 /* Increment the address. */
27992 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27994 /* Not needed with an alignment of 2 */
27997 emit_label (align_2_label);
27999 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28002 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28004 emit_label (align_3_label);
28007 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28010 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28013 /* Generate loop to check 4 bytes at a time. It is not a good idea to
28014 align this loop. It gives only huge programs, but does not help to
28016 emit_label (align_4_label);
28018 mem = change_address (src, SImode, out);
28019 emit_move_insn (scratch, mem);
28020 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
28022 /* This formula yields a nonzero result iff one of the bytes is zero.
28023 This saves three branches inside loop and many cycles. */
28025 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
28026 emit_insn (gen_one_cmplsi2 (scratch, scratch));
28027 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
28028 emit_insn (gen_andsi3 (tmpreg, tmpreg,
28029 gen_int_mode (0x80808080, SImode)));
28030 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
28035 rtx reg = gen_reg_rtx (SImode);
28036 rtx reg2 = gen_reg_rtx (Pmode);
28037 emit_move_insn (reg, tmpreg);
28038 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
28040 /* If zero is not in the first two bytes, move two bytes forward. */
28041 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28042 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28043 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28044 emit_insn (gen_rtx_SET (tmpreg,
28045 gen_rtx_IF_THEN_ELSE (SImode, tmp,
28048 /* Emit lea manually to avoid clobbering of flags. */
28049 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
28051 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28052 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28053 emit_insn (gen_rtx_SET (out,
28054 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
28060 rtx_code_label *end_2_label = gen_label_rtx ();
28061 /* Is zero in the first two bytes? */
28063 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28064 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28065 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
28066 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28067 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
28069 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
28070 JUMP_LABEL (tmp) = end_2_label;
28072 /* Not in the first two. Move two bytes forward. */
28073 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
28074 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
28076 emit_label (end_2_label);
28080 /* Avoid branch in fixing the byte. */
28081 tmpreg = gen_lowpart (QImode, tmpreg);
28082 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
28083 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
28084 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
28085 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
28087 emit_label (end_0_label);
28090 /* Expand strlen. */
28093 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
28095 rtx addr, scratch1, scratch2, scratch3, scratch4;
28097 /* The generic case of strlen expander is long. Avoid it's
28098 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
28100 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28101 && !TARGET_INLINE_ALL_STRINGOPS
28102 && !optimize_insn_for_size_p ()
28103 && (!CONST_INT_P (align) || INTVAL (align) < 4))
28106 addr = force_reg (Pmode, XEXP (src, 0));
28107 scratch1 = gen_reg_rtx (Pmode);
28109 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28110 && !optimize_insn_for_size_p ())
28112 /* Well it seems that some optimizer does not combine a call like
28113 foo(strlen(bar), strlen(bar));
28114 when the move and the subtraction is done here. It does calculate
28115 the length just once when these instructions are done inside of
28116 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
28117 often used and I use one fewer register for the lifetime of
28118 output_strlen_unroll() this is better. */
28120 emit_move_insn (out, addr);
28122 ix86_expand_strlensi_unroll_1 (out, src, align);
28124 /* strlensi_unroll_1 returns the address of the zero at the end of
28125 the string, like memchr(), so compute the length by subtracting
28126 the start address. */
28127 emit_insn (ix86_gen_sub3 (out, out, addr));
28133 /* Can't use this if the user has appropriated eax, ecx, or edi. */
28134 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
28136 /* Can't use this for non-default address spaces. */
28137 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
28140 scratch2 = gen_reg_rtx (Pmode);
28141 scratch3 = gen_reg_rtx (Pmode);
28142 scratch4 = force_reg (Pmode, constm1_rtx);
28144 emit_move_insn (scratch3, addr);
28145 eoschar = force_reg (QImode, eoschar);
28147 src = replace_equiv_address_nv (src, scratch3);
28149 /* If .md starts supporting :P, this can be done in .md. */
28150 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
28151 scratch4), UNSPEC_SCAS);
28152 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
28153 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
28154 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
28159 /* For given symbol (function) construct code to compute address of it's PLT
28160 entry in large x86-64 PIC model. */
28162 construct_plt_address (rtx symbol)
28166 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
28167 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
28168 gcc_assert (Pmode == DImode);
28170 tmp = gen_reg_rtx (Pmode);
28171 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
28173 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
28174 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
28179 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
28181 rtx pop, bool sibcall)
28184 rtx use = NULL, call;
28185 unsigned int vec_len = 0;
28188 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28190 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
28192 && (lookup_attribute ("interrupt",
28193 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
28194 error ("interrupt service routine can't be called directly");
28197 fndecl = NULL_TREE;
28199 if (pop == const0_rtx)
28201 gcc_assert (!TARGET_64BIT || !pop);
28203 if (TARGET_MACHO && !TARGET_64BIT)
28206 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28207 fnaddr = machopic_indirect_call_target (fnaddr);
28212 /* Static functions and indirect calls don't need the pic register. Also,
28213 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
28214 it an indirect call. */
28215 rtx addr = XEXP (fnaddr, 0);
28217 && GET_CODE (addr) == SYMBOL_REF
28218 && !SYMBOL_REF_LOCAL_P (addr))
28221 && (SYMBOL_REF_DECL (addr) == NULL_TREE
28222 || !lookup_attribute ("noplt",
28223 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
28226 || (ix86_cmodel == CM_LARGE_PIC
28227 && DEFAULT_ABI != MS_ABI))
28229 use_reg (&use, gen_rtx_REG (Pmode,
28230 REAL_PIC_OFFSET_TABLE_REGNUM));
28231 if (ix86_use_pseudo_pic_reg ())
28232 emit_move_insn (gen_rtx_REG (Pmode,
28233 REAL_PIC_OFFSET_TABLE_REGNUM),
28234 pic_offset_table_rtx);
28237 else if (!TARGET_PECOFF && !TARGET_MACHO)
28241 fnaddr = gen_rtx_UNSPEC (Pmode,
28242 gen_rtvec (1, addr),
28244 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28248 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
28250 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28251 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
28254 fnaddr = gen_const_mem (Pmode, fnaddr);
28255 /* Pmode may not be the same as word_mode for x32, which
28256 doesn't support indirect branch via 32-bit memory slot.
28257 Since x32 GOT slot is 64 bit with zero upper 32 bits,
28258 indirect branch via x32 GOT slot is OK. */
28259 if (GET_MODE (fnaddr) != word_mode)
28260 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
28261 fnaddr = gen_rtx_MEM (QImode, fnaddr);
28266 /* Skip setting up RAX register for -mskip-rax-setup when there are no
28267 parameters passed in vector registers. */
28269 && (INTVAL (callarg2) > 0
28270 || (INTVAL (callarg2) == 0
28271 && (TARGET_SSE || !flag_skip_rax_setup))))
28273 rtx al = gen_rtx_REG (QImode, AX_REG);
28274 emit_move_insn (al, callarg2);
28275 use_reg (&use, al);
28278 if (ix86_cmodel == CM_LARGE_PIC
28281 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
28282 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
28283 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
28284 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
28285 branch via x32 GOT slot is OK. */
28286 else if (!(TARGET_X32
28288 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
28289 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
28291 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
28292 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
28294 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
28295 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
28298 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
28301 call = gen_rtx_SET (retval, call);
28302 vec[vec_len++] = call;
28306 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
28307 pop = gen_rtx_SET (stack_pointer_rtx, pop);
28308 vec[vec_len++] = pop;
28311 if (cfun->machine->no_caller_saved_registers
28313 || (!TREE_THIS_VOLATILE (fndecl)
28314 && !lookup_attribute ("no_caller_saved_registers",
28315 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
28317 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
28318 bool is_64bit_ms_abi = (TARGET_64BIT
28319 && ix86_function_abi (fndecl) == MS_ABI);
28320 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
28322 /* If there are no caller-saved registers, add all registers
28323 that are clobbered by the call which returns. */
28324 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28326 && (ix86_call_used_regs[i] == 1
28327 || (ix86_call_used_regs[i] & c_mask))
28328 && !STACK_REGNO_P (i)
28329 && !MMX_REGNO_P (i))
28331 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
28333 else if (TARGET_64BIT_MS_ABI
28334 && (!callarg2 || INTVAL (callarg2) != -2))
28338 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
28340 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
28341 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
28343 clobber_reg (&use, gen_rtx_REG (mode, regno));
28346 /* Set here, but it may get cleared later. */
28347 if (TARGET_CALL_MS2SYSV_XLOGUES)
28352 /* Don't break hot-patched functions. */
28353 else if (ix86_function_ms_hook_prologue (current_function_decl))
28356 /* TODO: Cases not yet examined. */
28357 else if (flag_split_stack)
28358 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
28362 gcc_assert (!reload_completed);
28363 cfun->machine->call_ms2sysv = true;
28369 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
28370 rtx_insn *call_insn = emit_call_insn (call);
28372 CALL_INSN_FUNCTION_USAGE (call_insn) = use;
28377 /* Return true if the function being called was marked with attribute
28378 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
28379 to handle the non-PIC case in the backend because there is no easy
28380 interface for the front-end to force non-PLT calls to use the GOT.
28381 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28382 to call the function marked "noplt" indirectly. */
28385 ix86_nopic_noplt_attribute_p (rtx call_op)
28387 if (flag_pic || ix86_cmodel == CM_LARGE
28388 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
28389 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
28390 || SYMBOL_REF_LOCAL_P (call_op))
28393 tree symbol_decl = SYMBOL_REF_DECL (call_op);
28396 || (symbol_decl != NULL_TREE
28397 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
28403 /* Output indirect branch via a call and return thunk. CALL_OP is a
28404 register which contains the branch target. XASM is the assembly
28405 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
28406 A normal call is converted to:
28408 call __x86_indirect_thunk_reg
28410 and a tail call is converted to:
28412 jmp __x86_indirect_thunk_reg
28416 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
28418 char thunk_name_buf[32];
28420 enum indirect_thunk_prefix need_prefix
28421 = indirect_thunk_need_prefix (current_output_insn);
28422 int regno = REGNO (call_op);
28424 if (cfun->machine->indirect_branch_type
28425 != indirect_branch_thunk_inline)
28427 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28430 if (i >= FIRST_REX_INT_REG)
28431 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
28432 indirect_thunks_used |= 1 << i;
28434 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28435 thunk_name = thunk_name_buf;
28442 if (thunk_name != NULL)
28443 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28445 output_indirect_thunk (regno);
28449 if (thunk_name != NULL)
28451 fprintf (asm_out_file, "\tcall\t%s\n", thunk_name);
28455 char indirectlabel1[32];
28456 char indirectlabel2[32];
28458 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28460 indirectlabelno++);
28461 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28463 indirectlabelno++);
28466 fputs ("\tjmp\t", asm_out_file);
28467 assemble_name_raw (asm_out_file, indirectlabel2);
28468 fputc ('\n', asm_out_file);
28470 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28472 if (thunk_name != NULL)
28473 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28475 output_indirect_thunk (regno);
28477 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28480 fputs ("\tcall\t", asm_out_file);
28481 assemble_name_raw (asm_out_file, indirectlabel1);
28482 fputc ('\n', asm_out_file);
28486 /* Output indirect branch via a call and return thunk. CALL_OP is
28487 the branch target. XASM is the assembly template for CALL_OP.
28488 Branch is a tail call if SIBCALL_P is true. A normal call is
28494 jmp __x86_indirect_thunk
28498 and a tail call is converted to:
28501 jmp __x86_indirect_thunk
28505 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
28508 char thunk_name_buf[32];
28511 enum indirect_thunk_prefix need_prefix
28512 = indirect_thunk_need_prefix (current_output_insn);
28515 if (cfun->machine->indirect_branch_type
28516 != indirect_branch_thunk_inline)
28518 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28519 indirect_thunk_needed = true;
28520 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28521 thunk_name = thunk_name_buf;
28526 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
28527 TARGET_64BIT ? 'q' : 'l', xasm);
28531 output_asm_insn (push_buf, &call_op);
28532 if (thunk_name != NULL)
28533 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28535 output_indirect_thunk (regno);
28539 char indirectlabel1[32];
28540 char indirectlabel2[32];
28542 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28544 indirectlabelno++);
28545 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28547 indirectlabelno++);
28550 fputs ("\tjmp\t", asm_out_file);
28551 assemble_name_raw (asm_out_file, indirectlabel2);
28552 fputc ('\n', asm_out_file);
28554 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28556 /* An external function may be called via GOT, instead of PLT. */
28557 if (MEM_P (call_op))
28559 struct ix86_address parts;
28560 rtx addr = XEXP (call_op, 0);
28561 if (ix86_decompose_address (addr, &parts)
28562 && parts.base == stack_pointer_rtx)
28564 /* Since call will adjust stack by -UNITS_PER_WORD,
28565 we must convert "disp(stack, index, scale)" to
28566 "disp+UNITS_PER_WORD(stack, index, scale)". */
28569 addr = gen_rtx_MULT (Pmode, parts.index,
28570 GEN_INT (parts.scale));
28571 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28575 addr = stack_pointer_rtx;
28578 if (parts.disp != NULL_RTX)
28579 disp = plus_constant (Pmode, parts.disp,
28582 disp = GEN_INT (UNITS_PER_WORD);
28584 addr = gen_rtx_PLUS (Pmode, addr, disp);
28585 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
28589 output_asm_insn (push_buf, &call_op);
28591 if (thunk_name != NULL)
28592 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28594 output_indirect_thunk (regno);
28596 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28599 fputs ("\tcall\t", asm_out_file);
28600 assemble_name_raw (asm_out_file, indirectlabel1);
28601 fputc ('\n', asm_out_file);
28605 /* Output indirect branch via a call and return thunk. CALL_OP is
28606 the branch target. XASM is the assembly template for CALL_OP.
28607 Branch is a tail call if SIBCALL_P is true. */
28610 ix86_output_indirect_branch (rtx call_op, const char *xasm,
28613 if (REG_P (call_op))
28614 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
28616 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
28619 /* Output indirect jump. CALL_OP is the jump target. */
28622 ix86_output_indirect_jmp (rtx call_op)
28624 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
28626 /* We can't have red-zone since "call" in the indirect thunk
28627 pushes the return address onto stack, destroying red-zone. */
28628 if (ix86_red_zone_size != 0)
28629 gcc_unreachable ();
28631 ix86_output_indirect_branch (call_op, "%0", true);
28635 return "%!jmp\t%A0";
28638 /* Output return instrumentation for current function if needed. */
28641 output_return_instrumentation (void)
28643 if (ix86_instrument_return != instrument_return_none
28645 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
28647 if (ix86_flag_record_return)
28648 fprintf (asm_out_file, "1:\n");
28649 switch (ix86_instrument_return)
28651 case instrument_return_call:
28652 fprintf (asm_out_file, "\tcall\t__return__\n");
28654 case instrument_return_nop5:
28655 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
28656 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
28658 case instrument_return_none:
28662 if (ix86_flag_record_return)
28664 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
28665 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
28666 fprintf (asm_out_file, "\t.previous\n");
28671 /* Output function return. CALL_OP is the jump target. Add a REP
28672 prefix to RET if LONG_P is true and function return is kept. */
28675 ix86_output_function_return (bool long_p)
28677 output_return_instrumentation ();
28679 if (cfun->machine->function_return_type != indirect_branch_keep)
28681 char thunk_name[32];
28682 enum indirect_thunk_prefix need_prefix
28683 = indirect_thunk_need_prefix (current_output_insn);
28685 if (cfun->machine->function_return_type
28686 != indirect_branch_thunk_inline)
28688 bool need_thunk = (cfun->machine->function_return_type
28689 == indirect_branch_thunk);
28690 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
28692 indirect_return_needed |= need_thunk;
28693 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28696 output_indirect_thunk (INVALID_REGNUM);
28704 return "rep%; ret";
28707 /* Output indirect function return. RET_OP is the function return
28711 ix86_output_indirect_function_return (rtx ret_op)
28713 if (cfun->machine->function_return_type != indirect_branch_keep)
28715 char thunk_name[32];
28716 enum indirect_thunk_prefix need_prefix
28717 = indirect_thunk_need_prefix (current_output_insn);
28718 unsigned int regno = REGNO (ret_op);
28719 gcc_assert (regno == CX_REG);
28721 if (cfun->machine->function_return_type
28722 != indirect_branch_thunk_inline)
28724 bool need_thunk = (cfun->machine->function_return_type
28725 == indirect_branch_thunk);
28726 indirect_thunk_name (thunk_name, regno, need_prefix, true);
28730 indirect_return_via_cx = true;
28731 indirect_thunks_used |= 1 << CX_REG;
28733 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28736 output_indirect_thunk (regno);
28741 return "%!jmp\t%A0";
28744 /* Split simple return with popping POPC bytes from stack to indirect
28745 branch with stack adjustment . */
28748 ix86_split_simple_return_pop_internal (rtx popc)
28750 struct machine_function *m = cfun->machine;
28751 rtx ecx = gen_rtx_REG (SImode, CX_REG);
28754 /* There is no "pascal" calling convention in any 64bit ABI. */
28755 gcc_assert (!TARGET_64BIT);
28757 insn = emit_insn (gen_pop (ecx));
28758 m->fs.cfa_offset -= UNITS_PER_WORD;
28759 m->fs.sp_offset -= UNITS_PER_WORD;
28761 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
28762 x = gen_rtx_SET (stack_pointer_rtx, x);
28763 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
28764 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
28765 RTX_FRAME_RELATED_P (insn) = 1;
28767 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc);
28768 x = gen_rtx_SET (stack_pointer_rtx, x);
28769 insn = emit_insn (x);
28770 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
28771 RTX_FRAME_RELATED_P (insn) = 1;
28773 /* Now return address is in ECX. */
28774 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
28777 /* Output the assembly for a call instruction. */
28780 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
28782 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
28783 bool output_indirect_p
28785 && cfun->machine->indirect_branch_type != indirect_branch_keep);
28786 bool seh_nop_p = false;
28789 if (SIBLING_CALL_P (insn))
28791 output_return_instrumentation ();
28794 if (ix86_nopic_noplt_attribute_p (call_op))
28799 if (output_indirect_p)
28800 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28802 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28806 if (output_indirect_p)
28807 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
28809 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28813 xasm = "%!jmp\t%P0";
28815 /* SEH epilogue detection requires the indirect branch case
28816 to include REX.W. */
28817 else if (TARGET_SEH)
28818 xasm = "%!rex.W jmp\t%A0";
28821 if (output_indirect_p)
28824 xasm = "%!jmp\t%A0";
28827 if (output_indirect_p && !direct_p)
28828 ix86_output_indirect_branch (call_op, xasm, true);
28830 output_asm_insn (xasm, &call_op);
28834 /* SEH unwinding can require an extra nop to be emitted in several
28835 circumstances. Determine if we have one of those. */
28840 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
28842 /* Prevent a catch region from being adjacent to a jump that would
28843 be interpreted as an epilogue sequence by the unwinder. */
28844 if (JUMP_P(i) && CROSSING_JUMP_P (i))
28850 /* If we get to another real insn, we don't need the nop. */
28854 /* If we get to the epilogue note, prevent a catch region from
28855 being adjacent to the standard epilogue sequence. If non-
28856 call-exceptions, we'll have done this during epilogue emission. */
28857 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
28858 && !flag_non_call_exceptions
28859 && !can_throw_internal (insn))
28866 /* If we didn't find a real insn following the call, prevent the
28867 unwinder from looking into the next function. */
28874 if (ix86_nopic_noplt_attribute_p (call_op))
28879 if (output_indirect_p)
28880 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28882 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28886 if (output_indirect_p)
28887 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
28889 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28893 xasm = "%!call\t%P0";
28897 if (output_indirect_p)
28900 xasm = "%!call\t%A0";
28903 if (output_indirect_p && !direct_p)
28904 ix86_output_indirect_branch (call_op, xasm, false);
28906 output_asm_insn (xasm, &call_op);
28914 /* Clear stack slot assignments remembered from previous functions.
28915 This is called from INIT_EXPANDERS once before RTL is emitted for each
28918 static struct machine_function *
28919 ix86_init_machine_status (void)
28921 struct machine_function *f;
28923 f = ggc_cleared_alloc<machine_function> ();
28924 f->call_abi = ix86_abi;
28929 /* Return a MEM corresponding to a stack slot with mode MODE.
28930 Allocate a new slot if necessary.
28932 The RTL for a function can have several slots available: N is
28933 which slot to use. */
28936 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
28938 struct stack_local_entry *s;
28940 gcc_assert (n < MAX_386_STACK_LOCALS);
28942 for (s = ix86_stack_locals; s; s = s->next)
28943 if (s->mode == mode && s->n == n)
28944 return validize_mem (copy_rtx (s->rtl));
28946 s = ggc_alloc<stack_local_entry> ();
28949 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
28951 s->next = ix86_stack_locals;
28952 ix86_stack_locals = s;
28953 return validize_mem (copy_rtx (s->rtl));
28957 ix86_instantiate_decls (void)
28959 struct stack_local_entry *s;
28961 for (s = ix86_stack_locals; s; s = s->next)
28962 if (s->rtl != NULL_RTX)
28963 instantiate_decl_rtl (s->rtl);
28966 /* Check whether x86 address PARTS is a pc-relative address. */
28969 ix86_rip_relative_addr_p (struct ix86_address *parts)
28971 rtx base, index, disp;
28973 base = parts->base;
28974 index = parts->index;
28975 disp = parts->disp;
28977 if (disp && !base && !index)
28983 if (GET_CODE (disp) == CONST)
28984 symbol = XEXP (disp, 0);
28985 if (GET_CODE (symbol) == PLUS
28986 && CONST_INT_P (XEXP (symbol, 1)))
28987 symbol = XEXP (symbol, 0);
28989 if (GET_CODE (symbol) == LABEL_REF
28990 || (GET_CODE (symbol) == SYMBOL_REF
28991 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
28992 || (GET_CODE (symbol) == UNSPEC
28993 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
28994 || XINT (symbol, 1) == UNSPEC_PCREL
28995 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
29002 /* Calculate the length of the memory address in the instruction encoding.
29003 Includes addr32 prefix, does not include the one-byte modrm, opcode,
29004 or other prefixes. We never generate addr32 prefix for LEA insn. */
29007 memory_address_length (rtx addr, bool lea)
29009 struct ix86_address parts;
29010 rtx base, index, disp;
29014 if (GET_CODE (addr) == PRE_DEC
29015 || GET_CODE (addr) == POST_INC
29016 || GET_CODE (addr) == PRE_MODIFY
29017 || GET_CODE (addr) == POST_MODIFY)
29020 ok = ix86_decompose_address (addr, &parts);
29023 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
29025 /* If this is not LEA instruction, add the length of addr32 prefix. */
29026 if (TARGET_64BIT && !lea
29027 && (SImode_address_operand (addr, VOIDmode)
29028 || (parts.base && GET_MODE (parts.base) == SImode)
29029 || (parts.index && GET_MODE (parts.index) == SImode)))
29033 index = parts.index;
29036 if (base && SUBREG_P (base))
29037 base = SUBREG_REG (base);
29038 if (index && SUBREG_P (index))
29039 index = SUBREG_REG (index);
29041 gcc_assert (base == NULL_RTX || REG_P (base));
29042 gcc_assert (index == NULL_RTX || REG_P (index));
29045 - esp as the base always wants an index,
29046 - ebp as the base always wants a displacement,
29047 - r12 as the base always wants an index,
29048 - r13 as the base always wants a displacement. */
29050 /* Register Indirect. */
29051 if (base && !index && !disp)
29053 /* esp (for its index) and ebp (for its displacement) need
29054 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
29056 if (base == arg_pointer_rtx
29057 || base == frame_pointer_rtx
29058 || REGNO (base) == SP_REG
29059 || REGNO (base) == BP_REG
29060 || REGNO (base) == R12_REG
29061 || REGNO (base) == R13_REG)
29065 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
29066 is not disp32, but disp32(%rip), so for disp32
29067 SIB byte is needed, unless print_operand_address
29068 optimizes it into disp32(%rip) or (%rip) is implied
29070 else if (disp && !base && !index)
29073 if (!ix86_rip_relative_addr_p (&parts))
29078 /* Find the length of the displacement constant. */
29081 if (base && satisfies_constraint_K (disp))
29086 /* ebp always wants a displacement. Similarly r13. */
29087 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
29090 /* An index requires the two-byte modrm form.... */
29092 /* ...like esp (or r12), which always wants an index. */
29093 || base == arg_pointer_rtx
29094 || base == frame_pointer_rtx
29095 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
29102 /* Compute default value for "length_immediate" attribute. When SHORTFORM
29103 is set, expect that insn have 8bit immediate alternative. */
29105 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
29109 extract_insn_cached (insn);
29110 for (i = recog_data.n_operands - 1; i >= 0; --i)
29111 if (CONSTANT_P (recog_data.operand[i]))
29113 enum attr_mode mode = get_attr_mode (insn);
29116 if (shortform && CONST_INT_P (recog_data.operand[i]))
29118 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
29125 ival = trunc_int_for_mode (ival, HImode);
29128 ival = trunc_int_for_mode (ival, SImode);
29133 if (IN_RANGE (ival, -128, 127))
29150 /* Immediates for DImode instructions are encoded
29151 as 32bit sign extended values. */
29156 fatal_insn ("unknown insn mode", insn);
29162 /* Compute default value for "length_address" attribute. */
29164 ix86_attr_length_address_default (rtx_insn *insn)
29168 if (get_attr_type (insn) == TYPE_LEA)
29170 rtx set = PATTERN (insn), addr;
29172 if (GET_CODE (set) == PARALLEL)
29173 set = XVECEXP (set, 0, 0);
29175 gcc_assert (GET_CODE (set) == SET);
29177 addr = SET_SRC (set);
29179 return memory_address_length (addr, true);
29182 extract_insn_cached (insn);
29183 for (i = recog_data.n_operands - 1; i >= 0; --i)
29185 rtx op = recog_data.operand[i];
29188 constrain_operands_cached (insn, reload_completed);
29189 if (which_alternative != -1)
29191 const char *constraints = recog_data.constraints[i];
29192 int alt = which_alternative;
29194 while (*constraints == '=' || *constraints == '+')
29197 while (*constraints++ != ',')
29199 /* Skip ignored operands. */
29200 if (*constraints == 'X')
29204 int len = memory_address_length (XEXP (op, 0), false);
29206 /* Account for segment prefix for non-default addr spaces. */
29207 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
29216 /* Compute default value for "length_vex" attribute. It includes
29217 2 or 3 byte VEX prefix and 1 opcode byte. */
29220 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
29225 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
29226 byte VEX prefix. */
29227 if (!has_0f_opcode || has_vex_w)
29230 /* We can always use 2 byte VEX prefix in 32bit. */
29234 extract_insn_cached (insn);
29236 for (i = recog_data.n_operands - 1; i >= 0; --i)
29237 if (REG_P (recog_data.operand[i]))
29239 /* REX.W bit uses 3 byte VEX prefix. */
29240 if (GET_MODE (recog_data.operand[i]) == DImode
29241 && GENERAL_REG_P (recog_data.operand[i]))
29246 /* REX.X or REX.B bits use 3 byte VEX prefix. */
29247 if (MEM_P (recog_data.operand[i])
29248 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
29257 ix86_class_likely_spilled_p (reg_class_t);
29259 /* Returns true if lhs of insn is HW function argument register and set up
29260 is_spilled to true if it is likely spilled HW register. */
29262 insn_is_function_arg (rtx insn, bool* is_spilled)
29266 if (!NONDEBUG_INSN_P (insn))
29268 /* Call instructions are not movable, ignore it. */
29271 insn = PATTERN (insn);
29272 if (GET_CODE (insn) == PARALLEL)
29273 insn = XVECEXP (insn, 0, 0);
29274 if (GET_CODE (insn) != SET)
29276 dst = SET_DEST (insn);
29277 if (REG_P (dst) && HARD_REGISTER_P (dst)
29278 && ix86_function_arg_regno_p (REGNO (dst)))
29280 /* Is it likely spilled HW register? */
29281 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
29282 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
29283 *is_spilled = true;
29289 /* Add output dependencies for chain of function adjacent arguments if only
29290 there is a move to likely spilled HW register. Return first argument
29291 if at least one dependence was added or NULL otherwise. */
29293 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
29296 rtx_insn *last = call;
29297 rtx_insn *first_arg = NULL;
29298 bool is_spilled = false;
29300 head = PREV_INSN (head);
29302 /* Find nearest to call argument passing instruction. */
29305 last = PREV_INSN (last);
29308 if (!NONDEBUG_INSN_P (last))
29310 if (insn_is_function_arg (last, &is_spilled))
29318 insn = PREV_INSN (last);
29319 if (!INSN_P (insn))
29323 if (!NONDEBUG_INSN_P (insn))
29328 if (insn_is_function_arg (insn, &is_spilled))
29330 /* Add output depdendence between two function arguments if chain
29331 of output arguments contains likely spilled HW registers. */
29333 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29334 first_arg = last = insn;
29344 /* Add output or anti dependency from insn to first_arg to restrict its code
29347 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
29352 set = single_set (insn);
29355 tmp = SET_DEST (set);
29358 /* Add output dependency to the first function argument. */
29359 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29362 /* Add anti dependency. */
29363 add_dependence (first_arg, insn, REG_DEP_ANTI);
29366 /* Avoid cross block motion of function argument through adding dependency
29367 from the first non-jump instruction in bb. */
29369 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
29371 rtx_insn *insn = BB_END (bb);
29375 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
29377 rtx set = single_set (insn);
29380 avoid_func_arg_motion (arg, insn);
29384 if (insn == BB_HEAD (bb))
29386 insn = PREV_INSN (insn);
29390 /* Hook for pre-reload schedule - avoid motion of function arguments
29391 passed in likely spilled HW registers. */
29393 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
29396 rtx_insn *first_arg = NULL;
29397 if (reload_completed)
29399 while (head != tail && DEBUG_INSN_P (head))
29400 head = NEXT_INSN (head);
29401 for (insn = tail; insn != head; insn = PREV_INSN (insn))
29402 if (INSN_P (insn) && CALL_P (insn))
29404 first_arg = add_parameter_dependencies (insn, head);
29407 /* Add dependee for first argument to predecessors if only
29408 region contains more than one block. */
29409 basic_block bb = BLOCK_FOR_INSN (insn);
29410 int rgn = CONTAINING_RGN (bb->index);
29411 int nr_blks = RGN_NR_BLOCKS (rgn);
29412 /* Skip trivial regions and region head blocks that can have
29413 predecessors outside of region. */
29414 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
29419 /* Regions are SCCs with the exception of selective
29420 scheduling with pipelining of outer blocks enabled.
29421 So also check that immediate predecessors of a non-head
29422 block are in the same region. */
29423 FOR_EACH_EDGE (e, ei, bb->preds)
29425 /* Avoid creating of loop-carried dependencies through
29426 using topological ordering in the region. */
29427 if (rgn == CONTAINING_RGN (e->src->index)
29428 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
29429 add_dependee_for_func_arg (first_arg, e->src);
29437 else if (first_arg)
29438 avoid_func_arg_motion (first_arg, insn);
29441 /* Hook for pre-reload schedule - set priority of moves from likely spilled
29442 HW registers to maximum, to schedule them at soon as possible. These are
29443 moves from function argument registers at the top of the function entry
29444 and moves from function return value registers after call. */
29446 ix86_adjust_priority (rtx_insn *insn, int priority)
29450 if (reload_completed)
29453 if (!NONDEBUG_INSN_P (insn))
29456 set = single_set (insn);
29459 rtx tmp = SET_SRC (set);
29461 && HARD_REGISTER_P (tmp)
29462 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
29463 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
29464 return current_sched_info->sched_max_insns_priority;
29470 /* Prepare for scheduling pass. */
29472 ix86_sched_init_global (FILE *, int, int)
29474 /* Install scheduling hooks for current CPU. Some of these hooks are used
29475 in time-critical parts of the scheduler, so we only set them up when
29476 they are actually used. */
29479 case PROCESSOR_CORE2:
29480 case PROCESSOR_NEHALEM:
29481 case PROCESSOR_SANDYBRIDGE:
29482 case PROCESSOR_HASWELL:
29483 case PROCESSOR_GENERIC:
29484 /* Do not perform multipass scheduling for pre-reload schedule
29485 to save compile time. */
29486 if (reload_completed)
29488 ix86_core2i7_init_hooks ();
29491 /* Fall through. */
29493 targetm.sched.dfa_post_advance_cycle = NULL;
29494 targetm.sched.first_cycle_multipass_init = NULL;
29495 targetm.sched.first_cycle_multipass_begin = NULL;
29496 targetm.sched.first_cycle_multipass_issue = NULL;
29497 targetm.sched.first_cycle_multipass_backtrack = NULL;
29498 targetm.sched.first_cycle_multipass_end = NULL;
29499 targetm.sched.first_cycle_multipass_fini = NULL;
29505 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
29507 static HOST_WIDE_INT
29508 ix86_static_rtx_alignment (machine_mode mode)
29510 if (mode == DFmode)
29512 if (ALIGN_MODE_128 (mode))
29513 return MAX (128, GET_MODE_ALIGNMENT (mode));
29514 return GET_MODE_ALIGNMENT (mode);
29517 /* Implement TARGET_CONSTANT_ALIGNMENT. */
29519 static HOST_WIDE_INT
29520 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
29522 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29523 || TREE_CODE (exp) == INTEGER_CST)
29525 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
29526 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
29527 return MAX (mode_align, align);
29529 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29530 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29531 return BITS_PER_WORD;
29536 /* Implement TARGET_EMPTY_RECORD_P. */
29539 ix86_is_empty_record (const_tree type)
29543 return default_is_empty_record (type);
29546 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
29549 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
29551 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
29553 if (!cum->warn_empty)
29556 if (!TYPE_EMPTY_P (type))
29559 const_tree ctx = get_ultimate_context (cum->decl);
29560 if (ctx != NULL_TREE
29561 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
29564 /* If the actual size of the type is zero, then there is no change
29565 in how objects of this size are passed. */
29566 if (int_size_in_bytes (type) == 0)
29569 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
29570 "changes in -fabi-version=12 (GCC 8)", type);
29572 /* Only warn once. */
29573 cum->warn_empty = false;
29576 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
29577 the data type, and ALIGN is the alignment that the object would
29578 ordinarily have. */
29581 iamcu_alignment (tree type, int align)
29585 if (align < 32 || TYPE_USER_ALIGN (type))
29588 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29590 mode = TYPE_MODE (strip_array_types (type));
29591 switch (GET_MODE_CLASS (mode))
29594 case MODE_COMPLEX_INT:
29595 case MODE_COMPLEX_FLOAT:
29597 case MODE_DECIMAL_FLOAT:
29604 /* Compute the alignment for a static variable.
29605 TYPE is the data type, and ALIGN is the alignment that
29606 the object would ordinarily have. The value of this function is used
29607 instead of that alignment to align the object. */
29610 ix86_data_alignment (tree type, int align, bool opt)
29612 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29613 for symbols from other compilation units or symbols that don't need
29614 to bind locally. In order to preserve some ABI compatibility with
29615 those compilers, ensure we don't decrease alignment from what we
29618 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29620 /* A data structure, equal or greater than the size of a cache line
29621 (64 bytes in the Pentium 4 and other recent Intel processors, including
29622 processors based on Intel Core microarchitecture) should be aligned
29623 so that its base address is a multiple of a cache line size. */
29626 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29628 if (max_align < BITS_PER_WORD)
29629 max_align = BITS_PER_WORD;
29631 switch (ix86_align_data_type)
29633 case ix86_align_data_type_abi: opt = false; break;
29634 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29635 case ix86_align_data_type_cacheline: break;
29639 align = iamcu_alignment (type, align);
29642 && AGGREGATE_TYPE_P (type)
29643 && TYPE_SIZE (type)
29644 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29646 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
29647 && align < max_align_compat)
29648 align = max_align_compat;
29649 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
29650 && align < max_align)
29654 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29655 to 16byte boundary. */
29658 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29659 && TYPE_SIZE (type)
29660 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29661 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
29669 if (TREE_CODE (type) == ARRAY_TYPE)
29671 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29673 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29676 else if (TREE_CODE (type) == COMPLEX_TYPE)
29679 if (TYPE_MODE (type) == DCmode && align < 64)
29681 if ((TYPE_MODE (type) == XCmode
29682 || TYPE_MODE (type) == TCmode) && align < 128)
29685 else if ((TREE_CODE (type) == RECORD_TYPE
29686 || TREE_CODE (type) == UNION_TYPE
29687 || TREE_CODE (type) == QUAL_UNION_TYPE)
29688 && TYPE_FIELDS (type))
29690 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29692 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29695 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29696 || TREE_CODE (type) == INTEGER_TYPE)
29698 if (TYPE_MODE (type) == DFmode && align < 64)
29700 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29707 /* Compute the alignment for a local variable or a stack slot. EXP is
29708 the data type or decl itself, MODE is the widest mode available and
29709 ALIGN is the alignment that the object would ordinarily have. The
29710 value of this macro is used instead of that alignment to align the
29714 ix86_local_alignment (tree exp, machine_mode mode,
29715 unsigned int align)
29719 if (exp && DECL_P (exp))
29721 type = TREE_TYPE (exp);
29730 /* Don't do dynamic stack realignment for long long objects with
29731 -mpreferred-stack-boundary=2. */
29734 && ix86_preferred_stack_boundary < 64
29735 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29736 && (!type || !TYPE_USER_ALIGN (type))
29737 && (!decl || !DECL_USER_ALIGN (decl)))
29740 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29741 register in MODE. We will return the largest alignment of XF
29745 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29746 align = GET_MODE_ALIGNMENT (DFmode);
29750 /* Don't increase alignment for Intel MCU psABI. */
29754 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29755 to 16byte boundary. Exact wording is:
29757 An array uses the same alignment as its elements, except that a local or
29758 global array variable of length at least 16 bytes or
29759 a C99 variable-length array variable always has alignment of at least 16 bytes.
29761 This was added to allow use of aligned SSE instructions at arrays. This
29762 rule is meant for static storage (where compiler cannot do the analysis
29763 by itself). We follow it for automatic variables only when convenient.
29764 We fully control everything in the function compiled and functions from
29765 other unit cannot rely on the alignment.
29767 Exclude va_list type. It is the common case of local array where
29768 we cannot benefit from the alignment.
29770 TODO: Probably one should optimize for size only when var is not escaping. */
29771 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29774 if (AGGREGATE_TYPE_P (type)
29775 && (va_list_type_node == NULL_TREE
29776 || (TYPE_MAIN_VARIANT (type)
29777 != TYPE_MAIN_VARIANT (va_list_type_node)))
29778 && TYPE_SIZE (type)
29779 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29780 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
29784 if (TREE_CODE (type) == ARRAY_TYPE)
29786 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29788 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29791 else if (TREE_CODE (type) == COMPLEX_TYPE)
29793 if (TYPE_MODE (type) == DCmode && align < 64)
29795 if ((TYPE_MODE (type) == XCmode
29796 || TYPE_MODE (type) == TCmode) && align < 128)
29799 else if ((TREE_CODE (type) == RECORD_TYPE
29800 || TREE_CODE (type) == UNION_TYPE
29801 || TREE_CODE (type) == QUAL_UNION_TYPE)
29802 && TYPE_FIELDS (type))
29804 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29806 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29809 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29810 || TREE_CODE (type) == INTEGER_TYPE)
29813 if (TYPE_MODE (type) == DFmode && align < 64)
29815 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29821 /* Compute the minimum required alignment for dynamic stack realignment
29822 purposes for a local variable, parameter or a stack slot. EXP is
29823 the data type or decl itself, MODE is its mode and ALIGN is the
29824 alignment that the object would ordinarily have. */
29827 ix86_minimum_alignment (tree exp, machine_mode mode,
29828 unsigned int align)
29832 if (exp && DECL_P (exp))
29834 type = TREE_TYPE (exp);
29843 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29846 /* Don't do dynamic stack realignment for long long objects with
29847 -mpreferred-stack-boundary=2. */
29848 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29849 && (!type || !TYPE_USER_ALIGN (type))
29850 && (!decl || !DECL_USER_ALIGN (decl)))
29852 gcc_checking_assert (!TARGET_STV);
29859 /* Find a location for the static chain incoming to a nested function.
29860 This is a register, unless all free registers are used by arguments. */
29863 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29869 /* We always use R10 in 64-bit mode. */
29874 const_tree fntype, fndecl;
29877 /* By default in 32-bit mode we use ECX to pass the static chain. */
29880 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29882 fntype = TREE_TYPE (fndecl_or_type);
29883 fndecl = fndecl_or_type;
29887 fntype = fndecl_or_type;
29891 ccvt = ix86_get_callcvt (fntype);
29892 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29894 /* Fastcall functions use ecx/edx for arguments, which leaves
29895 us with EAX for the static chain.
29896 Thiscall functions use ecx for arguments, which also
29897 leaves us with EAX for the static chain. */
29900 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29902 /* Thiscall functions use ecx for arguments, which leaves
29903 us with EAX and EDX for the static chain.
29904 We are using for abi-compatibility EAX. */
29907 else if (ix86_function_regparm (fntype, fndecl) == 3)
29909 /* For regparm 3, we have no free call-clobbered registers in
29910 which to store the static chain. In order to implement this,
29911 we have the trampoline push the static chain to the stack.
29912 However, we can't push a value below the return address when
29913 we call the nested function directly, so we have to use an
29914 alternate entry point. For this we use ESI, and have the
29915 alternate entry point push ESI, so that things appear the
29916 same once we're executing the nested function. */
29919 if (fndecl == current_function_decl
29920 && !ix86_static_chain_on_stack)
29922 gcc_assert (!reload_completed);
29923 ix86_static_chain_on_stack = true;
29925 return gen_frame_mem (SImode,
29926 plus_constant (Pmode,
29927 arg_pointer_rtx, -8));
29933 return gen_rtx_REG (Pmode, regno);
29936 /* Emit RTL insns to initialize the variable parts of a trampoline.
29937 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29938 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29939 to be passed to the target function. */
29942 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29947 bool need_endbr = (flag_cf_protection & CF_BRANCH);
29949 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29957 /* Insert ENDBR64. */
29958 mem = adjust_address (m_tramp, SImode, offset);
29959 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
29963 /* Load the function address to r11. Try to load address using
29964 the shorter movl instead of movabs. We may want to support
29965 movq for kernel mode, but kernel does not use trampolines at
29966 the moment. FNADDR is a 32bit address and may not be in
29967 DImode when ptr_mode == SImode. Always use movl in this
29969 if (ptr_mode == SImode
29970 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29972 fnaddr = copy_addr_to_reg (fnaddr);
29974 mem = adjust_address (m_tramp, HImode, offset);
29975 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29977 mem = adjust_address (m_tramp, SImode, offset + 2);
29978 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29983 mem = adjust_address (m_tramp, HImode, offset);
29984 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29986 mem = adjust_address (m_tramp, DImode, offset + 2);
29987 emit_move_insn (mem, fnaddr);
29991 /* Load static chain using movabs to r10. Use the shorter movl
29992 instead of movabs when ptr_mode == SImode. */
29993 if (ptr_mode == SImode)
30004 mem = adjust_address (m_tramp, HImode, offset);
30005 emit_move_insn (mem, gen_int_mode (opcode, HImode));
30007 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
30008 emit_move_insn (mem, chain_value);
30011 /* Jump to r11; the last (unused) byte is a nop, only there to
30012 pad the write out to a single 32-bit store. */
30013 mem = adjust_address (m_tramp, SImode, offset);
30014 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
30021 /* Depending on the static chain location, either load a register
30022 with a constant, or push the constant to the stack. All of the
30023 instructions are the same size. */
30024 chain = ix86_static_chain (fndecl, true);
30027 switch (REGNO (chain))
30030 opcode = 0xb8; break;
30032 opcode = 0xb9; break;
30034 gcc_unreachable ();
30042 /* Insert ENDBR32. */
30043 mem = adjust_address (m_tramp, SImode, offset);
30044 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
30048 mem = adjust_address (m_tramp, QImode, offset);
30049 emit_move_insn (mem, gen_int_mode (opcode, QImode));
30051 mem = adjust_address (m_tramp, SImode, offset + 1);
30052 emit_move_insn (mem, chain_value);
30055 mem = adjust_address (m_tramp, QImode, offset);
30056 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
30058 mem = adjust_address (m_tramp, SImode, offset + 1);
30060 /* Compute offset from the end of the jmp to the target function.
30061 In the case in which the trampoline stores the static chain on
30062 the stack, we need to skip the first insn which pushes the
30063 (call-saved) register static chain; this push is 1 byte. */
30065 disp = expand_binop (SImode, sub_optab, fnaddr,
30066 plus_constant (Pmode, XEXP (m_tramp, 0),
30067 offset - (MEM_P (chain) ? 1 : 0)),
30068 NULL_RTX, 1, OPTAB_DIRECT);
30069 emit_move_insn (mem, disp);
30072 gcc_assert (offset <= TRAMPOLINE_SIZE);
30074 #ifdef HAVE_ENABLE_EXECUTE_STACK
30075 #ifdef CHECK_EXECUTE_STACK_ENABLED
30076 if (CHECK_EXECUTE_STACK_ENABLED)
30078 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
30079 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
30084 ix86_allocate_stack_slots_for_args (void)
30086 /* Naked functions should not allocate stack slots for arguments. */
30087 return !ix86_function_naked (current_function_decl);
30091 ix86_warn_func_return (tree decl)
30093 /* Naked functions are implemented entirely in assembly, including the
30094 return sequence, so suppress warnings about this. */
30095 return !ix86_function_naked (decl);
30098 /* The following file contains several enumerations and data structures
30099 built from the definitions in i386-builtin-types.def. */
30101 #include "i386-builtin-types.inc"
30103 /* Table for the ix86 builtin non-function types. */
30104 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
30106 /* Retrieve an element from the above table, building some of
30107 the types lazily. */
30110 ix86_get_builtin_type (enum ix86_builtin_type tcode)
30112 unsigned int index;
30115 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
30117 type = ix86_builtin_type_tab[(int) tcode];
30121 gcc_assert (tcode > IX86_BT_LAST_PRIM);
30122 if (tcode <= IX86_BT_LAST_VECT)
30126 index = tcode - IX86_BT_LAST_PRIM - 1;
30127 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
30128 mode = ix86_builtin_type_vect_mode[index];
30130 type = build_vector_type_for_mode (itype, mode);
30136 index = tcode - IX86_BT_LAST_VECT - 1;
30137 if (tcode <= IX86_BT_LAST_PTR)
30138 quals = TYPE_UNQUALIFIED;
30140 quals = TYPE_QUAL_CONST;
30142 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
30143 if (quals != TYPE_UNQUALIFIED)
30144 itype = build_qualified_type (itype, quals);
30146 type = build_pointer_type (itype);
30149 ix86_builtin_type_tab[(int) tcode] = type;
30153 /* Table for the ix86 builtin function types. */
30154 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
30156 /* Retrieve an element from the above table, building some of
30157 the types lazily. */
30160 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
30164 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
30166 type = ix86_builtin_func_type_tab[(int) tcode];
30170 if (tcode <= IX86_BT_LAST_FUNC)
30172 unsigned start = ix86_builtin_func_start[(int) tcode];
30173 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
30174 tree rtype, atype, args = void_list_node;
30177 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
30178 for (i = after - 1; i > start; --i)
30180 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
30181 args = tree_cons (NULL, atype, args);
30184 type = build_function_type (rtype, args);
30188 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
30189 enum ix86_builtin_func_type icode;
30191 icode = ix86_builtin_func_alias_base[index];
30192 type = ix86_get_builtin_func_type (icode);
30195 ix86_builtin_func_type_tab[(int) tcode] = type;
30200 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
30201 bdesc_* arrays below should come first, then builtins for each bdesc_*
30202 array in ascending order, so that we can use direct array accesses. */
30205 IX86_BUILTIN_MASKMOVQ,
30206 IX86_BUILTIN_LDMXCSR,
30207 IX86_BUILTIN_STMXCSR,
30208 IX86_BUILTIN_MASKMOVDQU,
30209 IX86_BUILTIN_PSLLDQ128,
30210 IX86_BUILTIN_CLFLUSH,
30211 IX86_BUILTIN_MONITOR,
30212 IX86_BUILTIN_MWAIT,
30213 IX86_BUILTIN_UMONITOR,
30214 IX86_BUILTIN_UMWAIT,
30215 IX86_BUILTIN_TPAUSE,
30216 IX86_BUILTIN_CLZERO,
30217 IX86_BUILTIN_CLDEMOTE,
30218 IX86_BUILTIN_VEC_INIT_V2SI,
30219 IX86_BUILTIN_VEC_INIT_V4HI,
30220 IX86_BUILTIN_VEC_INIT_V8QI,
30221 IX86_BUILTIN_VEC_EXT_V2DF,
30222 IX86_BUILTIN_VEC_EXT_V2DI,
30223 IX86_BUILTIN_VEC_EXT_V4SF,
30224 IX86_BUILTIN_VEC_EXT_V4SI,
30225 IX86_BUILTIN_VEC_EXT_V8HI,
30226 IX86_BUILTIN_VEC_EXT_V2SI,
30227 IX86_BUILTIN_VEC_EXT_V4HI,
30228 IX86_BUILTIN_VEC_EXT_V16QI,
30229 IX86_BUILTIN_VEC_SET_V2DI,
30230 IX86_BUILTIN_VEC_SET_V4SF,
30231 IX86_BUILTIN_VEC_SET_V4SI,
30232 IX86_BUILTIN_VEC_SET_V8HI,
30233 IX86_BUILTIN_VEC_SET_V4HI,
30234 IX86_BUILTIN_VEC_SET_V16QI,
30235 IX86_BUILTIN_GATHERSIV2DF,
30236 IX86_BUILTIN_GATHERSIV4DF,
30237 IX86_BUILTIN_GATHERDIV2DF,
30238 IX86_BUILTIN_GATHERDIV4DF,
30239 IX86_BUILTIN_GATHERSIV4SF,
30240 IX86_BUILTIN_GATHERSIV8SF,
30241 IX86_BUILTIN_GATHERDIV4SF,
30242 IX86_BUILTIN_GATHERDIV8SF,
30243 IX86_BUILTIN_GATHERSIV2DI,
30244 IX86_BUILTIN_GATHERSIV4DI,
30245 IX86_BUILTIN_GATHERDIV2DI,
30246 IX86_BUILTIN_GATHERDIV4DI,
30247 IX86_BUILTIN_GATHERSIV4SI,
30248 IX86_BUILTIN_GATHERSIV8SI,
30249 IX86_BUILTIN_GATHERDIV4SI,
30250 IX86_BUILTIN_GATHERDIV8SI,
30251 IX86_BUILTIN_VFMSUBSD3_MASK3,
30252 IX86_BUILTIN_VFMSUBSS3_MASK3,
30253 IX86_BUILTIN_GATHER3SIV8SF,
30254 IX86_BUILTIN_GATHER3SIV4SF,
30255 IX86_BUILTIN_GATHER3SIV4DF,
30256 IX86_BUILTIN_GATHER3SIV2DF,
30257 IX86_BUILTIN_GATHER3DIV8SF,
30258 IX86_BUILTIN_GATHER3DIV4SF,
30259 IX86_BUILTIN_GATHER3DIV4DF,
30260 IX86_BUILTIN_GATHER3DIV2DF,
30261 IX86_BUILTIN_GATHER3SIV8SI,
30262 IX86_BUILTIN_GATHER3SIV4SI,
30263 IX86_BUILTIN_GATHER3SIV4DI,
30264 IX86_BUILTIN_GATHER3SIV2DI,
30265 IX86_BUILTIN_GATHER3DIV8SI,
30266 IX86_BUILTIN_GATHER3DIV4SI,
30267 IX86_BUILTIN_GATHER3DIV4DI,
30268 IX86_BUILTIN_GATHER3DIV2DI,
30269 IX86_BUILTIN_SCATTERSIV8SF,
30270 IX86_BUILTIN_SCATTERSIV4SF,
30271 IX86_BUILTIN_SCATTERSIV4DF,
30272 IX86_BUILTIN_SCATTERSIV2DF,
30273 IX86_BUILTIN_SCATTERDIV8SF,
30274 IX86_BUILTIN_SCATTERDIV4SF,
30275 IX86_BUILTIN_SCATTERDIV4DF,
30276 IX86_BUILTIN_SCATTERDIV2DF,
30277 IX86_BUILTIN_SCATTERSIV8SI,
30278 IX86_BUILTIN_SCATTERSIV4SI,
30279 IX86_BUILTIN_SCATTERSIV4DI,
30280 IX86_BUILTIN_SCATTERSIV2DI,
30281 IX86_BUILTIN_SCATTERDIV8SI,
30282 IX86_BUILTIN_SCATTERDIV4SI,
30283 IX86_BUILTIN_SCATTERDIV4DI,
30284 IX86_BUILTIN_SCATTERDIV2DI,
30285 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30286 where all operands are 32-byte or 64-byte wide respectively. */
30287 IX86_BUILTIN_GATHERALTSIV4DF,
30288 IX86_BUILTIN_GATHERALTDIV8SF,
30289 IX86_BUILTIN_GATHERALTSIV4DI,
30290 IX86_BUILTIN_GATHERALTDIV8SI,
30291 IX86_BUILTIN_GATHER3ALTDIV16SF,
30292 IX86_BUILTIN_GATHER3ALTDIV16SI,
30293 IX86_BUILTIN_GATHER3ALTSIV4DF,
30294 IX86_BUILTIN_GATHER3ALTDIV8SF,
30295 IX86_BUILTIN_GATHER3ALTSIV4DI,
30296 IX86_BUILTIN_GATHER3ALTDIV8SI,
30297 IX86_BUILTIN_GATHER3ALTSIV8DF,
30298 IX86_BUILTIN_GATHER3ALTSIV8DI,
30299 IX86_BUILTIN_GATHER3DIV16SF,
30300 IX86_BUILTIN_GATHER3DIV16SI,
30301 IX86_BUILTIN_GATHER3DIV8DF,
30302 IX86_BUILTIN_GATHER3DIV8DI,
30303 IX86_BUILTIN_GATHER3SIV16SF,
30304 IX86_BUILTIN_GATHER3SIV16SI,
30305 IX86_BUILTIN_GATHER3SIV8DF,
30306 IX86_BUILTIN_GATHER3SIV8DI,
30307 IX86_BUILTIN_SCATTERALTSIV8DF,
30308 IX86_BUILTIN_SCATTERALTDIV16SF,
30309 IX86_BUILTIN_SCATTERALTSIV8DI,
30310 IX86_BUILTIN_SCATTERALTDIV16SI,
30311 IX86_BUILTIN_SCATTERALTSIV4DF,
30312 IX86_BUILTIN_SCATTERALTDIV8SF,
30313 IX86_BUILTIN_SCATTERALTSIV4DI,
30314 IX86_BUILTIN_SCATTERALTDIV8SI,
30315 IX86_BUILTIN_SCATTERALTSIV2DF,
30316 IX86_BUILTIN_SCATTERALTDIV4SF,
30317 IX86_BUILTIN_SCATTERALTSIV2DI,
30318 IX86_BUILTIN_SCATTERALTDIV4SI,
30319 IX86_BUILTIN_SCATTERDIV16SF,
30320 IX86_BUILTIN_SCATTERDIV16SI,
30321 IX86_BUILTIN_SCATTERDIV8DF,
30322 IX86_BUILTIN_SCATTERDIV8DI,
30323 IX86_BUILTIN_SCATTERSIV16SF,
30324 IX86_BUILTIN_SCATTERSIV16SI,
30325 IX86_BUILTIN_SCATTERSIV8DF,
30326 IX86_BUILTIN_SCATTERSIV8DI,
30327 IX86_BUILTIN_GATHERPFQPD,
30328 IX86_BUILTIN_GATHERPFDPS,
30329 IX86_BUILTIN_GATHERPFDPD,
30330 IX86_BUILTIN_GATHERPFQPS,
30331 IX86_BUILTIN_SCATTERPFDPD,
30332 IX86_BUILTIN_SCATTERPFDPS,
30333 IX86_BUILTIN_SCATTERPFQPD,
30334 IX86_BUILTIN_SCATTERPFQPS,
30336 IX86_BUILTIN_CLFLUSHOPT,
30338 IX86_BUILTIN_HUGE_VALQ,
30340 IX86_BUILTIN_NANSQ,
30341 IX86_BUILTIN_XABORT,
30342 IX86_BUILTIN_ADDCARRYX32,
30343 IX86_BUILTIN_ADDCARRYX64,
30344 IX86_BUILTIN_SBB32,
30345 IX86_BUILTIN_SBB64,
30346 IX86_BUILTIN_RDRAND16_STEP,
30347 IX86_BUILTIN_RDRAND32_STEP,
30348 IX86_BUILTIN_RDRAND64_STEP,
30349 IX86_BUILTIN_RDSEED16_STEP,
30350 IX86_BUILTIN_RDSEED32_STEP,
30351 IX86_BUILTIN_RDSEED64_STEP,
30352 IX86_BUILTIN_MONITORX,
30353 IX86_BUILTIN_MWAITX,
30354 IX86_BUILTIN_CFSTRING,
30355 IX86_BUILTIN_CPU_INIT,
30356 IX86_BUILTIN_CPU_IS,
30357 IX86_BUILTIN_CPU_SUPPORTS,
30358 IX86_BUILTIN_READ_FLAGS,
30359 IX86_BUILTIN_WRITE_FLAGS,
30361 /* All the remaining builtins are tracked in bdesc_* arrays in
30362 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
30364 #define BDESC(mask, icode, name, code, comparison, flag) \
30366 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30368 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
30369 #define BDESC_END(kind, next_kind)
30371 #include "i386-builtin.def"
30379 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
30381 /* Now just the aliases for bdesc_* start/end. */
30382 #define BDESC(mask, icode, name, code, comparison, flag)
30383 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
30384 #define BDESC_END(kind, next_kind) \
30385 IX86_BUILTIN__BDESC_##kind##_LAST \
30386 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
30388 #include "i386-builtin.def"
30394 /* Just to make sure there is no comma after the last enumerator. */
30395 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
30398 /* Table for the ix86 builtin decls. */
30399 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30401 /* Table of all of the builtin functions that are possible with different ISA's
30402 but are waiting to be built until a function is declared to use that
30404 struct builtin_isa {
30405 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30406 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
30407 const char *name; /* function name */
30408 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30409 unsigned char const_p:1; /* true if the declaration is constant */
30410 unsigned char pure_p:1; /* true if the declaration has pure attribute */
30411 bool leaf_p; /* true if the declaration has leaf attribute */
30412 bool nothrow_p; /* true if the declaration has nothrow attribute */
30413 bool set_and_not_built_p;
30416 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30418 /* Bits that can still enable any inclusion of a builtin. */
30419 static HOST_WIDE_INT deferred_isa_values = 0;
30420 static HOST_WIDE_INT deferred_isa_values2 = 0;
30422 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30423 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30424 function decl in the ix86_builtins array. Returns the function decl or
30425 NULL_TREE, if the builtin was not added.
30427 If the front end has a special hook for builtin functions, delay adding
30428 builtin functions that aren't in the current ISA until the ISA is changed
30429 with function specific optimization. Doing so, can save about 300K for the
30430 default compiler. When the builtin is expanded, check at that time whether
30433 If the front end doesn't have a special hook, record all builtins, even if
30434 it isn't an instruction set in the current ISA in case the user uses
30435 function specific options for a different ISA, so that we don't get scope
30436 errors if a builtin is added in the middle of a function scope. */
30439 def_builtin (HOST_WIDE_INT mask, const char *name,
30440 enum ix86_builtin_func_type tcode,
30441 enum ix86_builtins code)
30443 tree decl = NULL_TREE;
30445 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30447 ix86_builtins_isa[(int) code].isa = mask;
30449 mask &= ~OPTION_MASK_ISA_64BIT;
30451 /* Filter out the masks most often ored together with others. */
30452 if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30453 && mask != OPTION_MASK_ISA_AVX512VL)
30454 mask &= ~OPTION_MASK_ISA_AVX512VL;
30455 if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW)
30456 && mask != OPTION_MASK_ISA_AVX512BW)
30457 mask &= ~OPTION_MASK_ISA_AVX512BW;
30460 || (mask & ix86_isa_flags) != 0
30461 || (lang_hooks.builtin_function
30462 == lang_hooks.builtin_function_ext_scope))
30464 tree type = ix86_get_builtin_func_type (tcode);
30465 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30467 ix86_builtins[(int) code] = decl;
30468 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30472 /* Just a MASK where set_and_not_built_p == true can potentially
30473 include a builtin. */
30474 deferred_isa_values |= mask;
30475 ix86_builtins[(int) code] = NULL_TREE;
30476 ix86_builtins_isa[(int) code].tcode = tcode;
30477 ix86_builtins_isa[(int) code].name = name;
30478 ix86_builtins_isa[(int) code].leaf_p = false;
30479 ix86_builtins_isa[(int) code].nothrow_p = false;
30480 ix86_builtins_isa[(int) code].const_p = false;
30481 ix86_builtins_isa[(int) code].pure_p = false;
30482 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30489 /* Like def_builtin, but also marks the function decl "const". */
30492 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30493 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30495 tree decl = def_builtin (mask, name, tcode, code);
30497 TREE_READONLY (decl) = 1;
30499 ix86_builtins_isa[(int) code].const_p = true;
30504 /* Like def_builtin, but also marks the function decl "pure". */
30507 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
30508 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30510 tree decl = def_builtin (mask, name, tcode, code);
30512 DECL_PURE_P (decl) = 1;
30514 ix86_builtins_isa[(int) code].pure_p = true;
30519 /* Like def_builtin, but for additional isa2 flags. */
30522 def_builtin2 (HOST_WIDE_INT mask, const char *name,
30523 enum ix86_builtin_func_type tcode,
30524 enum ix86_builtins code)
30526 tree decl = NULL_TREE;
30528 if (tcode == VOID_FTYPE_UINT64)
30532 ix86_builtins_isa[(int) code].isa = OPTION_MASK_ISA_64BIT;
30534 ix86_builtins_isa[(int) code].isa2 = mask;
30537 || (mask & ix86_isa_flags2) != 0
30538 || (lang_hooks.builtin_function
30539 == lang_hooks.builtin_function_ext_scope))
30542 tree type = ix86_get_builtin_func_type (tcode);
30543 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30545 ix86_builtins[(int) code] = decl;
30546 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30550 /* Just a MASK where set_and_not_built_p == true can potentially
30551 include a builtin. */
30552 deferred_isa_values2 |= mask;
30553 ix86_builtins[(int) code] = NULL_TREE;
30554 ix86_builtins_isa[(int) code].tcode = tcode;
30555 ix86_builtins_isa[(int) code].name = name;
30556 ix86_builtins_isa[(int) code].leaf_p = false;
30557 ix86_builtins_isa[(int) code].nothrow_p = false;
30558 ix86_builtins_isa[(int) code].const_p = false;
30559 ix86_builtins_isa[(int) code].pure_p = false;
30560 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30566 /* Like def_builtin, but also marks the function decl "const". */
30569 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
30570 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30572 tree decl = def_builtin2 (mask, name, tcode, code);
30574 TREE_READONLY (decl) = 1;
30576 ix86_builtins_isa[(int) code].const_p = true;
30581 /* Add any new builtin functions for a given ISA that may not have been
30582 declared. This saves a bit of space compared to adding all of the
30583 declarations to the tree, even if we didn't use them. */
30586 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
30588 isa &= ~OPTION_MASK_ISA_64BIT;
30590 if ((isa & deferred_isa_values) == 0
30591 && (isa2 & deferred_isa_values2) == 0)
30594 /* Bits in ISA value can be removed from potential isa values. */
30595 deferred_isa_values &= ~isa;
30596 deferred_isa_values2 &= ~isa2;
30599 tree saved_current_target_pragma = current_target_pragma;
30600 current_target_pragma = NULL_TREE;
30602 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30604 if (((ix86_builtins_isa[i].isa & isa) != 0
30605 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
30606 && ix86_builtins_isa[i].set_and_not_built_p)
30610 /* Don't define the builtin again. */
30611 ix86_builtins_isa[i].set_and_not_built_p = false;
30613 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30614 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30615 type, i, BUILT_IN_MD, NULL,
30618 ix86_builtins[i] = decl;
30619 if (ix86_builtins_isa[i].const_p)
30620 TREE_READONLY (decl) = 1;
30621 if (ix86_builtins_isa[i].pure_p)
30622 DECL_PURE_P (decl) = 1;
30623 if (ix86_builtins_isa[i].leaf_p)
30624 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30626 if (ix86_builtins_isa[i].nothrow_p)
30627 TREE_NOTHROW (decl) = 1;
30631 current_target_pragma = saved_current_target_pragma;
30634 /* Bits for builtin_description.flag. */
30636 /* Set when we don't support the comparison natively, and should
30637 swap_comparison in order to support it. */
30638 #define BUILTIN_DESC_SWAP_OPERANDS 1
30640 struct builtin_description
30642 const HOST_WIDE_INT mask;
30643 const enum insn_code icode;
30644 const char *const name;
30645 const enum ix86_builtins code;
30646 const enum rtx_code comparison;
30650 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30651 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30652 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30653 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30654 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30655 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30656 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30657 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30658 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30659 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30660 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30661 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30662 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30663 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30664 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30665 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30666 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30667 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30668 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30669 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30670 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30671 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30672 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30673 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30674 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30675 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30676 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30677 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30678 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30679 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30680 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30681 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30682 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30683 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30684 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30685 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30686 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30687 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30688 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30689 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30690 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30691 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30692 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30693 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30694 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30695 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30696 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30697 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30698 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30699 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30700 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30701 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30703 #define BDESC(mask, icode, name, code, comparison, flag) \
30704 { mask, icode, name, code, comparison, flag },
30705 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30706 static const struct builtin_description bdesc_##kind[] = \
30708 BDESC (mask, icode, name, code, comparison, flag)
30709 #define BDESC_END(kind, next_kind) \
30712 #include "i386-builtin.def"
30718 /* TM vector builtins. */
30720 /* Reuse the existing x86-specific `struct builtin_description' cause
30721 we're lazy. Add casts to make them fit. */
30722 static const struct builtin_description bdesc_tm[] =
30724 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30725 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30726 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30727 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30728 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30729 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30730 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30732 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30733 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30734 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30735 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30736 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30737 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30738 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30740 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30741 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30742 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30743 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30744 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30745 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30746 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30748 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30749 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30750 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30753 /* Initialize the transactional memory vector load/store builtins. */
30756 ix86_init_tm_builtins (void)
30758 enum ix86_builtin_func_type ftype;
30759 const struct builtin_description *d;
30762 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30763 tree attrs_log, attrs_type_log;
30768 /* If there are no builtins defined, we must be compiling in a
30769 language without trans-mem support. */
30770 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30773 /* Use whatever attributes a normal TM load has. */
30774 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30775 attrs_load = DECL_ATTRIBUTES (decl);
30776 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30777 /* Use whatever attributes a normal TM store has. */
30778 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30779 attrs_store = DECL_ATTRIBUTES (decl);
30780 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30781 /* Use whatever attributes a normal TM log has. */
30782 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30783 attrs_log = DECL_ATTRIBUTES (decl);
30784 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30786 for (i = 0, d = bdesc_tm;
30787 i < ARRAY_SIZE (bdesc_tm);
30790 if ((d->mask & ix86_isa_flags) != 0
30791 || (lang_hooks.builtin_function
30792 == lang_hooks.builtin_function_ext_scope))
30794 tree type, attrs, attrs_type;
30795 enum built_in_function code = (enum built_in_function) d->code;
30797 ftype = (enum ix86_builtin_func_type) d->flag;
30798 type = ix86_get_builtin_func_type (ftype);
30800 if (BUILTIN_TM_LOAD_P (code))
30802 attrs = attrs_load;
30803 attrs_type = attrs_type_load;
30805 else if (BUILTIN_TM_STORE_P (code))
30807 attrs = attrs_store;
30808 attrs_type = attrs_type_store;
30813 attrs_type = attrs_type_log;
30815 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30816 /* The builtin without the prefix for
30817 calling it directly. */
30818 d->name + strlen ("__builtin_"),
30820 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30821 set the TYPE_ATTRIBUTES. */
30822 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30824 set_builtin_decl (code, decl, false);
30829 /* Macros for verification of enum ix86_builtins order. */
30830 #define BDESC_VERIFY(x, y, z) \
30831 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
30832 #define BDESC_VERIFYS(x, y, z) \
30833 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
30835 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30836 IX86_BUILTIN__BDESC_COMI_LAST, 1);
30837 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30838 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
30839 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30840 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
30841 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
30842 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
30843 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30844 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
30845 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
30846 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
30847 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
30848 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
30849 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30850 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST, 1);
30851 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
30852 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
30853 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30854 IX86_BUILTIN__BDESC_CET_LAST, 1);
30855 BDESC_VERIFYS (IX86_BUILTIN_MAX,
30856 IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
30858 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30859 in the current target ISA to allow the user to compile particular modules
30860 with different target specific options that differ from the command line
30863 ix86_init_mmx_sse_builtins (void)
30865 const struct builtin_description * d;
30866 enum ix86_builtin_func_type ftype;
30869 /* Add all special builtins with variable number of operands. */
30870 for (i = 0, d = bdesc_special_args;
30871 i < ARRAY_SIZE (bdesc_special_args);
30874 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
30878 ftype = (enum ix86_builtin_func_type) d->flag;
30879 def_builtin (d->mask, d->name, ftype, d->code);
30881 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
30882 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30883 ARRAY_SIZE (bdesc_special_args) - 1);
30885 /* Add all special builtins with variable number of operands. */
30886 for (i = 0, d = bdesc_special_args2;
30887 i < ARRAY_SIZE (bdesc_special_args2);
30890 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST, i);
30894 ftype = (enum ix86_builtin_func_type) d->flag;
30895 def_builtin2 (d->mask, d->name, ftype, d->code);
30897 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
30898 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
30899 ARRAY_SIZE (bdesc_special_args2) - 1);
30901 /* Add all builtins with variable number of operands. */
30902 for (i = 0, d = bdesc_args;
30903 i < ARRAY_SIZE (bdesc_args);
30906 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
30910 ftype = (enum ix86_builtin_func_type) d->flag;
30911 def_builtin_const (d->mask, d->name, ftype, d->code);
30913 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
30914 IX86_BUILTIN__BDESC_ARGS_FIRST,
30915 ARRAY_SIZE (bdesc_args) - 1);
30917 /* Add all builtins with variable number of operands. */
30918 for (i = 0, d = bdesc_args2;
30919 i < ARRAY_SIZE (bdesc_args2);
30922 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS2_FIRST, i);
30926 ftype = (enum ix86_builtin_func_type) d->flag;
30927 def_builtin_const2 (d->mask, d->name, ftype, d->code);
30929 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_LAST,
30930 IX86_BUILTIN__BDESC_ARGS2_FIRST,
30931 ARRAY_SIZE (bdesc_args2) - 1);
30933 /* Add all builtins with rounding. */
30934 for (i = 0, d = bdesc_round_args;
30935 i < ARRAY_SIZE (bdesc_round_args);
30938 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
30942 ftype = (enum ix86_builtin_func_type) d->flag;
30943 def_builtin_const (d->mask, d->name, ftype, d->code);
30945 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
30946 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30947 ARRAY_SIZE (bdesc_round_args) - 1);
30949 /* pcmpestr[im] insns. */
30950 for (i = 0, d = bdesc_pcmpestr;
30951 i < ARRAY_SIZE (bdesc_pcmpestr);
30954 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
30955 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30956 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30958 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30959 def_builtin_const (d->mask, d->name, ftype, d->code);
30961 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
30962 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30963 ARRAY_SIZE (bdesc_pcmpestr) - 1);
30965 /* pcmpistr[im] insns. */
30966 for (i = 0, d = bdesc_pcmpistr;
30967 i < ARRAY_SIZE (bdesc_pcmpistr);
30970 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
30971 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30972 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30974 ftype = INT_FTYPE_V16QI_V16QI_INT;
30975 def_builtin_const (d->mask, d->name, ftype, d->code);
30977 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
30978 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30979 ARRAY_SIZE (bdesc_pcmpistr) - 1);
30981 /* comi/ucomi insns. */
30982 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30984 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
30985 if (d->mask == OPTION_MASK_ISA_SSE2)
30986 ftype = INT_FTYPE_V2DF_V2DF;
30988 ftype = INT_FTYPE_V4SF_V4SF;
30989 def_builtin_const (d->mask, d->name, ftype, d->code);
30991 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
30992 IX86_BUILTIN__BDESC_COMI_FIRST,
30993 ARRAY_SIZE (bdesc_comi) - 1);
30996 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30997 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30998 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30999 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31001 /* SSE or 3DNow!A */
31002 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31003 /* As it uses V4HImode, we have to require -mmmx too. */
31004 | OPTION_MASK_ISA_MMX,
31005 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31006 IX86_BUILTIN_MASKMOVQ);
31009 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31010 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31012 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31013 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31014 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31015 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31018 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31019 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31020 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31021 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31024 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31025 "__builtin_ia32_aesenc128",
31026 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31027 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31028 "__builtin_ia32_aesenclast128",
31029 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31030 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31031 "__builtin_ia32_aesdec128",
31032 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31033 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31034 "__builtin_ia32_aesdeclast128",
31035 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31036 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31037 "__builtin_ia32_aesimc128",
31038 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31039 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31040 "__builtin_ia32_aeskeygenassist128",
31041 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31044 def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2,
31045 "__builtin_ia32_pclmulqdq128",
31046 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31049 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31050 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31051 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31052 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31053 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31054 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31055 IX86_BUILTIN_RDRAND64_STEP);
31058 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31059 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31060 IX86_BUILTIN_GATHERSIV2DF);
31062 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31063 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31064 IX86_BUILTIN_GATHERSIV4DF);
31066 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31067 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31068 IX86_BUILTIN_GATHERDIV2DF);
31070 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31071 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31072 IX86_BUILTIN_GATHERDIV4DF);
31074 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31075 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31076 IX86_BUILTIN_GATHERSIV4SF);
31078 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31079 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31080 IX86_BUILTIN_GATHERSIV8SF);
31082 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31083 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31084 IX86_BUILTIN_GATHERDIV4SF);
31086 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31087 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31088 IX86_BUILTIN_GATHERDIV8SF);
31090 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31091 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31092 IX86_BUILTIN_GATHERSIV2DI);
31094 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31095 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31096 IX86_BUILTIN_GATHERSIV4DI);
31098 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31099 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31100 IX86_BUILTIN_GATHERDIV2DI);
31102 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31103 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31104 IX86_BUILTIN_GATHERDIV4DI);
31106 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31107 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31108 IX86_BUILTIN_GATHERSIV4SI);
31110 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31111 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31112 IX86_BUILTIN_GATHERSIV8SI);
31114 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31115 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31116 IX86_BUILTIN_GATHERDIV4SI);
31118 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31119 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31120 IX86_BUILTIN_GATHERDIV8SI);
31122 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31123 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31124 IX86_BUILTIN_GATHERALTSIV4DF);
31126 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv8sf ",
31127 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31128 IX86_BUILTIN_GATHERALTDIV8SF);
31130 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31131 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31132 IX86_BUILTIN_GATHERALTSIV4DI);
31134 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv8si ",
31135 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31136 IX86_BUILTIN_GATHERALTDIV8SI);
31139 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31140 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
31141 IX86_BUILTIN_GATHER3SIV16SF);
31143 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31144 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
31145 IX86_BUILTIN_GATHER3SIV8DF);
31147 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31148 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
31149 IX86_BUILTIN_GATHER3DIV16SF);
31151 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31152 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
31153 IX86_BUILTIN_GATHER3DIV8DF);
31155 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31156 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
31157 IX86_BUILTIN_GATHER3SIV16SI);
31159 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31160 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
31161 IX86_BUILTIN_GATHER3SIV8DI);
31163 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31164 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
31165 IX86_BUILTIN_GATHER3DIV16SI);
31167 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31168 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
31169 IX86_BUILTIN_GATHER3DIV8DI);
31171 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altsiv8df ",
31172 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31173 IX86_BUILTIN_GATHER3ALTSIV8DF);
31175 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altdiv16sf ",
31176 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31177 IX86_BUILTIN_GATHER3ALTDIV16SF);
31179 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altsiv8di ",
31180 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31181 IX86_BUILTIN_GATHER3ALTSIV8DI);
31183 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gather3altdiv16si ",
31184 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31185 IX86_BUILTIN_GATHER3ALTDIV16SI);
31187 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31188 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
31189 IX86_BUILTIN_SCATTERSIV16SF);
31191 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31192 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
31193 IX86_BUILTIN_SCATTERSIV8DF);
31195 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31196 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
31197 IX86_BUILTIN_SCATTERDIV16SF);
31199 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31200 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
31201 IX86_BUILTIN_SCATTERDIV8DF);
31203 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31204 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
31205 IX86_BUILTIN_SCATTERSIV16SI);
31207 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31208 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
31209 IX86_BUILTIN_SCATTERSIV8DI);
31211 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31212 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
31213 IX86_BUILTIN_SCATTERDIV16SI);
31215 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31216 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
31217 IX86_BUILTIN_SCATTERDIV8DI);
31220 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
31221 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
31222 IX86_BUILTIN_GATHER3SIV2DF);
31224 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
31225 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
31226 IX86_BUILTIN_GATHER3SIV4DF);
31228 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
31229 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
31230 IX86_BUILTIN_GATHER3DIV2DF);
31232 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
31233 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
31234 IX86_BUILTIN_GATHER3DIV4DF);
31236 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
31237 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
31238 IX86_BUILTIN_GATHER3SIV4SF);
31240 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
31241 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
31242 IX86_BUILTIN_GATHER3SIV8SF);
31244 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
31245 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
31246 IX86_BUILTIN_GATHER3DIV4SF);
31248 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
31249 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
31250 IX86_BUILTIN_GATHER3DIV8SF);
31252 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
31253 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
31254 IX86_BUILTIN_GATHER3SIV2DI);
31256 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
31257 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
31258 IX86_BUILTIN_GATHER3SIV4DI);
31260 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
31261 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
31262 IX86_BUILTIN_GATHER3DIV2DI);
31264 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
31265 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
31266 IX86_BUILTIN_GATHER3DIV4DI);
31268 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
31269 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
31270 IX86_BUILTIN_GATHER3SIV4SI);
31272 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
31273 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
31274 IX86_BUILTIN_GATHER3SIV8SI);
31276 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
31277 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
31278 IX86_BUILTIN_GATHER3DIV4SI);
31280 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
31281 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
31282 IX86_BUILTIN_GATHER3DIV8SI);
31284 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
31285 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
31286 IX86_BUILTIN_GATHER3ALTSIV4DF);
31288 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
31289 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
31290 IX86_BUILTIN_GATHER3ALTDIV8SF);
31292 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
31293 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
31294 IX86_BUILTIN_GATHER3ALTSIV4DI);
31296 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
31297 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
31298 IX86_BUILTIN_GATHER3ALTDIV8SI);
31300 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
31301 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
31302 IX86_BUILTIN_SCATTERSIV8SF);
31304 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
31305 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
31306 IX86_BUILTIN_SCATTERSIV4SF);
31308 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
31309 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
31310 IX86_BUILTIN_SCATTERSIV4DF);
31312 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
31313 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
31314 IX86_BUILTIN_SCATTERSIV2DF);
31316 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
31317 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
31318 IX86_BUILTIN_SCATTERDIV8SF);
31320 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
31321 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
31322 IX86_BUILTIN_SCATTERDIV4SF);
31324 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
31325 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
31326 IX86_BUILTIN_SCATTERDIV4DF);
31328 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
31329 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
31330 IX86_BUILTIN_SCATTERDIV2DF);
31332 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
31333 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
31334 IX86_BUILTIN_SCATTERSIV8SI);
31336 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
31337 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
31338 IX86_BUILTIN_SCATTERSIV4SI);
31340 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
31341 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
31342 IX86_BUILTIN_SCATTERSIV4DI);
31344 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
31345 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
31346 IX86_BUILTIN_SCATTERSIV2DI);
31348 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
31349 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
31350 IX86_BUILTIN_SCATTERDIV8SI);
31352 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
31353 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
31354 IX86_BUILTIN_SCATTERDIV4SI);
31356 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
31357 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
31358 IX86_BUILTIN_SCATTERDIV4DI);
31360 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
31361 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
31362 IX86_BUILTIN_SCATTERDIV2DI);
31364 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
31365 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
31366 IX86_BUILTIN_SCATTERALTSIV8DF);
31368 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv16sf ",
31369 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
31370 IX86_BUILTIN_SCATTERALTDIV16SF);
31372 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
31373 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
31374 IX86_BUILTIN_SCATTERALTSIV8DI);
31376 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv16si ",
31377 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
31378 IX86_BUILTIN_SCATTERALTDIV16SI);
31380 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv4df ",
31381 VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT,
31382 IX86_BUILTIN_SCATTERALTSIV4DF);
31384 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv8sf ",
31385 VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT,
31386 IX86_BUILTIN_SCATTERALTDIV8SF);
31388 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv4di ",
31389 VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT,
31390 IX86_BUILTIN_SCATTERALTSIV4DI);
31392 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv8si ",
31393 VOID_FTYPE_PINT_QI_V4DI_V8SI_INT,
31394 IX86_BUILTIN_SCATTERALTDIV8SI);
31396 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv2df ",
31397 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
31398 IX86_BUILTIN_SCATTERALTSIV2DF);
31400 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv4sf ",
31401 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
31402 IX86_BUILTIN_SCATTERALTDIV4SF);
31404 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltsiv2di ",
31405 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
31406 IX86_BUILTIN_SCATTERALTSIV2DI);
31408 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatteraltdiv4si ",
31409 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
31410 IX86_BUILTIN_SCATTERALTDIV4SI);
31413 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31414 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31415 IX86_BUILTIN_GATHERPFDPD);
31416 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31417 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31418 IX86_BUILTIN_GATHERPFDPS);
31419 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31420 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31421 IX86_BUILTIN_GATHERPFQPD);
31422 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31423 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31424 IX86_BUILTIN_GATHERPFQPS);
31425 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31426 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31427 IX86_BUILTIN_SCATTERPFDPD);
31428 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31429 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31430 IX86_BUILTIN_SCATTERPFDPS);
31431 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31432 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31433 IX86_BUILTIN_SCATTERPFQPD);
31434 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31435 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31436 IX86_BUILTIN_SCATTERPFQPS);
31439 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31440 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31441 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31442 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31443 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31444 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31445 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31446 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31447 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31448 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31449 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31450 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31451 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31452 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31455 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31456 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31458 /* MMX access to the vec_init patterns. */
31459 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31460 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31462 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31463 V4HI_FTYPE_HI_HI_HI_HI,
31464 IX86_BUILTIN_VEC_INIT_V4HI);
31466 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31467 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31468 IX86_BUILTIN_VEC_INIT_V8QI);
31470 /* Access to the vec_extract patterns. */
31471 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31472 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31473 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31474 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31475 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31476 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31477 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31478 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31479 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31480 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31482 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31483 /* As it uses V4HImode, we have to require -mmmx too. */
31484 | OPTION_MASK_ISA_MMX,
31485 "__builtin_ia32_vec_ext_v4hi",
31486 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31488 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31489 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31491 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31492 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31494 /* Access to the vec_set patterns. */
31495 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31496 "__builtin_ia32_vec_set_v2di",
31497 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31499 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31500 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31502 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31503 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31505 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31506 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31508 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31509 /* As it uses V4HImode, we have to require -mmmx too. */
31510 | OPTION_MASK_ISA_MMX,
31511 "__builtin_ia32_vec_set_v4hi",
31512 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31514 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31515 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31518 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31519 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31520 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31521 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31522 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31523 "__builtin_ia32_rdseed_di_step",
31524 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31527 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31528 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31529 def_builtin (OPTION_MASK_ISA_64BIT,
31530 "__builtin_ia32_addcarryx_u64",
31531 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31532 IX86_BUILTIN_ADDCARRYX64);
31535 def_builtin (0, "__builtin_ia32_sbb_u32",
31536 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31537 def_builtin (OPTION_MASK_ISA_64BIT,
31538 "__builtin_ia32_sbb_u64",
31539 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31540 IX86_BUILTIN_SBB64);
31542 /* Read/write FLAGS. */
31545 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31546 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31547 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31548 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31552 def_builtin (0, "__builtin_ia32_readeflags_u32",
31553 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31554 def_builtin (0, "__builtin_ia32_writeeflags_u32",
31555 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31559 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31560 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31563 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
31564 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
31566 /* MONITORX and MWAITX. */
31567 def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
31568 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
31569 def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
31570 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
31573 def_builtin2 (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
31574 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
31577 def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor",
31578 VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR);
31579 def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait",
31580 UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT);
31581 def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause",
31582 UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE);
31585 def_builtin2 (OPTION_MASK_ISA_CLDEMOTE, "__builtin_ia32_cldemote",
31586 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE);
31588 /* Add FMA4 multi-arg argument instructions */
31589 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31591 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
31595 ftype = (enum ix86_builtin_func_type) d->flag;
31596 def_builtin_const (d->mask, d->name, ftype, d->code);
31598 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
31599 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
31600 ARRAY_SIZE (bdesc_multi_arg) - 1);
31602 /* Add CET inrinsics. */
31603 for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
31605 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
31609 ftype = (enum ix86_builtin_func_type) d->flag;
31610 def_builtin (d->mask, d->name, ftype, d->code);
31612 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
31613 IX86_BUILTIN__BDESC_CET_FIRST,
31614 ARRAY_SIZE (bdesc_cet) - 1);
31616 for (i = 0, d = bdesc_cet_rdssp;
31617 i < ARRAY_SIZE (bdesc_cet_rdssp);
31620 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
31624 ftype = (enum ix86_builtin_func_type) d->flag;
31625 def_builtin (d->mask, d->name, ftype, d->code);
31627 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
31628 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
31629 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
31632 #undef BDESC_VERIFY
31633 #undef BDESC_VERIFYS
31635 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31636 to return a pointer to VERSION_DECL if the outcome of the expression
31637 formed by PREDICATE_CHAIN is true. This function will be called during
31638 version dispatch to decide which function version to execute. It returns
31639 the basic block at the end, to which more conditions can be added. */
31642 add_condition_to_bb (tree function_decl, tree version_decl,
31643 tree predicate_chain, basic_block new_bb)
31645 gimple *return_stmt;
31646 tree convert_expr, result_var;
31647 gimple *convert_stmt;
31648 gimple *call_cond_stmt;
31649 gimple *if_else_stmt;
31651 basic_block bb1, bb2, bb3;
31654 tree cond_var, and_expr_var = NULL_TREE;
31657 tree predicate_decl, predicate_arg;
31659 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31661 gcc_assert (new_bb != NULL);
31662 gseq = bb_seq (new_bb);
31665 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31666 build_fold_addr_expr (version_decl));
31667 result_var = create_tmp_var (ptr_type_node);
31668 convert_stmt = gimple_build_assign (result_var, convert_expr);
31669 return_stmt = gimple_build_return (result_var);
31671 if (predicate_chain == NULL_TREE)
31673 gimple_seq_add_stmt (&gseq, convert_stmt);
31674 gimple_seq_add_stmt (&gseq, return_stmt);
31675 set_bb_seq (new_bb, gseq);
31676 gimple_set_bb (convert_stmt, new_bb);
31677 gimple_set_bb (return_stmt, new_bb);
31682 while (predicate_chain != NULL)
31684 cond_var = create_tmp_var (integer_type_node);
31685 predicate_decl = TREE_PURPOSE (predicate_chain);
31686 predicate_arg = TREE_VALUE (predicate_chain);
31687 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31688 gimple_call_set_lhs (call_cond_stmt, cond_var);
31690 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31691 gimple_set_bb (call_cond_stmt, new_bb);
31692 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31694 predicate_chain = TREE_CHAIN (predicate_chain);
31696 if (and_expr_var == NULL)
31697 and_expr_var = cond_var;
31700 gimple *assign_stmt;
31701 /* Use MIN_EXPR to check if any integer is zero?.
31702 and_expr_var = min_expr <cond_var, and_expr_var> */
31703 assign_stmt = gimple_build_assign (and_expr_var,
31704 build2 (MIN_EXPR, integer_type_node,
31705 cond_var, and_expr_var));
31707 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31708 gimple_set_bb (assign_stmt, new_bb);
31709 gimple_seq_add_stmt (&gseq, assign_stmt);
31713 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31715 NULL_TREE, NULL_TREE);
31716 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31717 gimple_set_bb (if_else_stmt, new_bb);
31718 gimple_seq_add_stmt (&gseq, if_else_stmt);
31720 gimple_seq_add_stmt (&gseq, convert_stmt);
31721 gimple_seq_add_stmt (&gseq, return_stmt);
31722 set_bb_seq (new_bb, gseq);
31725 e12 = split_block (bb1, if_else_stmt);
31727 e12->flags &= ~EDGE_FALLTHRU;
31728 e12->flags |= EDGE_TRUE_VALUE;
31730 e23 = split_block (bb2, return_stmt);
31732 gimple_set_bb (convert_stmt, bb2);
31733 gimple_set_bb (return_stmt, bb2);
31736 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31739 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31746 /* This parses the attribute arguments to target in DECL and determines
31747 the right builtin to use to match the platform specification.
31748 It returns the priority value for this version decl. If PREDICATE_LIST
31749 is not NULL, it stores the list of cpu features that need to be checked
31750 before dispatching this function. */
31752 static unsigned int
31753 get_builtin_code_for_version (tree decl, tree *predicate_list)
31756 struct cl_target_option cur_target;
31758 struct cl_target_option *new_target;
31759 const char *arg_str = NULL;
31760 const char *attrs_str = NULL;
31761 char *tok_str = NULL;
31764 /* Priority of i386 features, greater value is higher priority. This is
31765 used to decide the order in which function dispatch must happen. For
31766 instance, a version specialized for SSE4.2 should be checked for dispatch
31767 before a version for SSE3, as SSE4.2 implies SSE3. */
31768 enum feature_priority
31801 enum feature_priority priority = P_ZERO;
31803 /* These are the target attribute strings for which a dispatcher is
31804 available, from fold_builtin_cpu. */
31806 static struct _feature_list
31808 const char *const name;
31809 const enum feature_priority priority;
31811 const feature_list[] =
31817 {"sse4a", P_SSE4_A},
31818 {"ssse3", P_SSSE3},
31819 {"sse4.1", P_SSE4_1},
31820 {"sse4.2", P_SSE4_2},
31821 {"popcnt", P_POPCNT},
31823 {"pclmul", P_PCLMUL},
31831 {"avx512f", P_AVX512F}
31835 static unsigned int NUM_FEATURES
31836 = sizeof (feature_list) / sizeof (struct _feature_list);
31840 tree predicate_chain = NULL_TREE;
31841 tree predicate_decl, predicate_arg;
31843 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31844 gcc_assert (attrs != NULL);
31846 attrs = TREE_VALUE (TREE_VALUE (attrs));
31848 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31849 attrs_str = TREE_STRING_POINTER (attrs);
31851 /* Return priority zero for default function. */
31852 if (strcmp (attrs_str, "default") == 0)
31855 /* Handle arch= if specified. For priority, set it to be 1 more than
31856 the best instruction set the processor can handle. For instance, if
31857 there is a version for atom and a version for ssse3 (the highest ISA
31858 priority for atom), the atom version must be checked for dispatch
31859 before the ssse3 version. */
31860 if (strstr (attrs_str, "arch=") != NULL)
31862 cl_target_option_save (&cur_target, &global_options);
31863 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31864 &global_options_set);
31866 gcc_assert (target_node);
31867 if (target_node == error_mark_node)
31869 new_target = TREE_TARGET_OPTION (target_node);
31870 gcc_assert (new_target);
31872 if (new_target->arch_specified && new_target->arch > 0)
31874 switch (new_target->arch)
31876 case PROCESSOR_CORE2:
31878 priority = P_PROC_SSSE3;
31880 case PROCESSOR_NEHALEM:
31881 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31883 arg_str = "westmere";
31888 /* We translate "arch=corei7" and "arch=nehalem" to
31889 "corei7" so that it will be mapped to M_INTEL_COREI7
31890 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31891 arg_str = "corei7";
31892 priority = P_PROC_SSE4_2;
31895 case PROCESSOR_SANDYBRIDGE:
31896 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31897 arg_str = "ivybridge";
31899 arg_str = "sandybridge";
31900 priority = P_PROC_AVX;
31902 case PROCESSOR_HASWELL:
31903 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31904 arg_str = "broadwell";
31906 arg_str = "haswell";
31907 priority = P_PROC_AVX2;
31909 case PROCESSOR_SKYLAKE:
31910 arg_str = "skylake";
31911 priority = P_PROC_AVX2;
31913 case PROCESSOR_SKYLAKE_AVX512:
31914 arg_str = "skylake-avx512";
31915 priority = P_PROC_AVX512F;
31917 case PROCESSOR_CANNONLAKE:
31918 arg_str = "cannonlake";
31919 priority = P_PROC_AVX512F;
31921 case PROCESSOR_ICELAKE_CLIENT:
31922 arg_str = "icelake-client";
31923 priority = P_PROC_AVX512F;
31925 case PROCESSOR_ICELAKE_SERVER:
31926 arg_str = "icelake-server";
31927 priority = P_PROC_AVX512F;
31929 case PROCESSOR_CASCADELAKE:
31930 arg_str = "cascadelake";
31931 priority = P_PROC_AVX512F;
31933 case PROCESSOR_BONNELL:
31934 arg_str = "bonnell";
31935 priority = P_PROC_SSSE3;
31937 case PROCESSOR_KNL:
31939 priority = P_PROC_AVX512F;
31941 case PROCESSOR_KNM:
31943 priority = P_PROC_AVX512F;
31945 case PROCESSOR_SILVERMONT:
31946 arg_str = "silvermont";
31947 priority = P_PROC_SSE4_2;
31949 case PROCESSOR_GOLDMONT:
31950 arg_str = "goldmont";
31951 priority = P_PROC_SSE4_2;
31953 case PROCESSOR_GOLDMONT_PLUS:
31954 arg_str = "goldmont-plus";
31955 priority = P_PROC_SSE4_2;
31957 case PROCESSOR_TREMONT:
31958 arg_str = "tremont";
31959 priority = P_PROC_SSE4_2;
31961 case PROCESSOR_AMDFAM10:
31962 arg_str = "amdfam10h";
31963 priority = P_PROC_SSE4_A;
31965 case PROCESSOR_BTVER1:
31966 arg_str = "btver1";
31967 priority = P_PROC_SSE4_A;
31969 case PROCESSOR_BTVER2:
31970 arg_str = "btver2";
31971 priority = P_PROC_BMI;
31973 case PROCESSOR_BDVER1:
31974 arg_str = "bdver1";
31975 priority = P_PROC_XOP;
31977 case PROCESSOR_BDVER2:
31978 arg_str = "bdver2";
31979 priority = P_PROC_FMA;
31981 case PROCESSOR_BDVER3:
31982 arg_str = "bdver3";
31983 priority = P_PROC_FMA;
31985 case PROCESSOR_BDVER4:
31986 arg_str = "bdver4";
31987 priority = P_PROC_AVX2;
31989 case PROCESSOR_ZNVER1:
31990 arg_str = "znver1";
31991 priority = P_PROC_AVX2;
31993 case PROCESSOR_ZNVER2:
31994 arg_str = "znver2";
31995 priority = P_PROC_AVX2;
32000 cl_target_option_restore (&global_options, &cur_target);
32002 if (predicate_list && arg_str == NULL)
32004 error_at (DECL_SOURCE_LOCATION (decl),
32005 "No dispatcher found for the versioning attributes");
32009 if (predicate_list)
32011 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
32012 /* For a C string literal the length includes the trailing NULL. */
32013 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
32014 predicate_chain = tree_cons (predicate_decl, predicate_arg,
32019 /* Process feature name. */
32020 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
32021 strcpy (tok_str, attrs_str);
32022 token = strtok (tok_str, ",");
32023 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
32025 while (token != NULL)
32027 /* Do not process "arch=" */
32028 if (strncmp (token, "arch=", 5) == 0)
32030 token = strtok (NULL, ",");
32033 for (i = 0; i < NUM_FEATURES; ++i)
32035 if (strcmp (token, feature_list[i].name) == 0)
32037 if (predicate_list)
32039 predicate_arg = build_string_literal (
32040 strlen (feature_list[i].name) + 1,
32041 feature_list[i].name);
32042 predicate_chain = tree_cons (predicate_decl, predicate_arg,
32045 /* Find the maximum priority feature. */
32046 if (feature_list[i].priority > priority)
32047 priority = feature_list[i].priority;
32052 if (predicate_list && i == NUM_FEATURES)
32054 error_at (DECL_SOURCE_LOCATION (decl),
32055 "No dispatcher found for %s", token);
32058 token = strtok (NULL, ",");
32062 if (predicate_list && predicate_chain == NULL_TREE)
32064 error_at (DECL_SOURCE_LOCATION (decl),
32065 "No dispatcher found for the versioning attributes : %s",
32069 else if (predicate_list)
32071 predicate_chain = nreverse (predicate_chain);
32072 *predicate_list = predicate_chain;
32078 /* This compares the priority of target features in function DECL1
32079 and DECL2. It returns positive value if DECL1 is higher priority,
32080 negative value if DECL2 is higher priority and 0 if they are the
32084 ix86_compare_version_priority (tree decl1, tree decl2)
32086 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
32087 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
32089 return (int)priority1 - (int)priority2;
32092 /* V1 and V2 point to function versions with different priorities
32093 based on the target ISA. This function compares their priorities. */
32096 feature_compare (const void *v1, const void *v2)
32098 typedef struct _function_version_info
32101 tree predicate_chain;
32102 unsigned int dispatch_priority;
32103 } function_version_info;
32105 const function_version_info c1 = *(const function_version_info *)v1;
32106 const function_version_info c2 = *(const function_version_info *)v2;
32107 return (c2.dispatch_priority - c1.dispatch_priority);
32110 /* This function generates the dispatch function for
32111 multi-versioned functions. DISPATCH_DECL is the function which will
32112 contain the dispatch logic. FNDECLS are the function choices for
32113 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
32114 in DISPATCH_DECL in which the dispatch code is generated. */
32117 dispatch_function_versions (tree dispatch_decl,
32119 basic_block *empty_bb)
32122 gimple *ifunc_cpu_init_stmt;
32126 vec<tree> *fndecls;
32127 unsigned int num_versions = 0;
32128 unsigned int actual_versions = 0;
32131 struct _function_version_info
32134 tree predicate_chain;
32135 unsigned int dispatch_priority;
32136 }*function_version_info;
32138 gcc_assert (dispatch_decl != NULL
32139 && fndecls_p != NULL
32140 && empty_bb != NULL);
32142 /*fndecls_p is actually a vector. */
32143 fndecls = static_cast<vec<tree> *> (fndecls_p);
32145 /* At least one more version other than the default. */
32146 num_versions = fndecls->length ();
32147 gcc_assert (num_versions >= 2);
32149 function_version_info = (struct _function_version_info *)
32150 XNEWVEC (struct _function_version_info, (num_versions - 1));
32152 /* The first version in the vector is the default decl. */
32153 default_decl = (*fndecls)[0];
32155 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
32157 gseq = bb_seq (*empty_bb);
32158 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
32159 constructors, so explicity call __builtin_cpu_init here. */
32160 ifunc_cpu_init_stmt = gimple_build_call_vec (
32161 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
32162 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
32163 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
32164 set_bb_seq (*empty_bb, gseq);
32169 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32171 tree version_decl = ele;
32172 tree predicate_chain = NULL_TREE;
32173 unsigned int priority;
32174 /* Get attribute string, parse it and find the right predicate decl.
32175 The predicate function could be a lengthy combination of many
32176 features, like arch-type and various isa-variants. */
32177 priority = get_builtin_code_for_version (version_decl,
32180 if (predicate_chain == NULL_TREE)
32183 function_version_info [actual_versions].version_decl = version_decl;
32184 function_version_info [actual_versions].predicate_chain
32186 function_version_info [actual_versions].dispatch_priority = priority;
32190 /* Sort the versions according to descending order of dispatch priority. The
32191 priority is based on the ISA. This is not a perfect solution. There
32192 could still be ambiguity. If more than one function version is suitable
32193 to execute, which one should be dispatched? In future, allow the user
32194 to specify a dispatch priority next to the version. */
32195 qsort (function_version_info, actual_versions,
32196 sizeof (struct _function_version_info), feature_compare);
32198 for (i = 0; i < actual_versions; ++i)
32199 *empty_bb = add_condition_to_bb (dispatch_decl,
32200 function_version_info[i].version_decl,
32201 function_version_info[i].predicate_chain,
32204 /* dispatch default version at the end. */
32205 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
32208 free (function_version_info);
32212 /* This function changes the assembler name for functions that are
32213 versions. If DECL is a function version and has a "target"
32214 attribute, it appends the attribute string to its assembler name. */
32217 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32220 const char *orig_name, *version_string;
32221 char *attr_str, *assembler_name;
32223 if (DECL_DECLARED_INLINE_P (decl)
32224 && lookup_attribute ("gnu_inline",
32225 DECL_ATTRIBUTES (decl)))
32226 error_at (DECL_SOURCE_LOCATION (decl),
32227 "Function versions cannot be marked as gnu_inline,"
32228 " bodies have to be generated");
32230 if (DECL_VIRTUAL_P (decl)
32231 || DECL_VINDEX (decl))
32232 sorry ("Virtual function multiversioning not supported");
32234 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32236 /* target attribute string cannot be NULL. */
32237 gcc_assert (version_attr != NULL_TREE);
32239 orig_name = IDENTIFIER_POINTER (id);
32241 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32243 if (strcmp (version_string, "default") == 0)
32246 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32247 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32249 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32251 /* Allow assembler name to be modified if already set. */
32252 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32253 SET_DECL_RTL (decl, NULL);
32255 tree ret = get_identifier (assembler_name);
32256 XDELETEVEC (attr_str);
32257 XDELETEVEC (assembler_name);
32263 ix86_mangle_decl_assembler_name (tree decl, tree id)
32265 /* For function version, add the target suffix to the assembler name. */
32266 if (TREE_CODE (decl) == FUNCTION_DECL
32267 && DECL_FUNCTION_VERSIONED (decl))
32268 id = ix86_mangle_function_version_assembler_name (decl, id);
32269 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32270 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32276 /* Make a dispatcher declaration for the multi-versioned function DECL.
32277 Calls to DECL function will be replaced with calls to the dispatcher
32278 by the front-end. Returns the decl of the dispatcher function. */
32281 ix86_get_function_versions_dispatcher (void *decl)
32283 tree fn = (tree) decl;
32284 struct cgraph_node *node = NULL;
32285 struct cgraph_node *default_node = NULL;
32286 struct cgraph_function_version_info *node_v = NULL;
32287 struct cgraph_function_version_info *first_v = NULL;
32289 tree dispatch_decl = NULL;
32291 struct cgraph_function_version_info *default_version_info = NULL;
32293 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32295 node = cgraph_node::get (fn);
32296 gcc_assert (node != NULL);
32298 node_v = node->function_version ();
32299 gcc_assert (node_v != NULL);
32301 if (node_v->dispatcher_resolver != NULL)
32302 return node_v->dispatcher_resolver;
32304 /* Find the default version and make it the first node. */
32306 /* Go to the beginning of the chain. */
32307 while (first_v->prev != NULL)
32308 first_v = first_v->prev;
32309 default_version_info = first_v;
32310 while (default_version_info != NULL)
32312 if (is_function_default_version
32313 (default_version_info->this_node->decl))
32315 default_version_info = default_version_info->next;
32318 /* If there is no default node, just return NULL. */
32319 if (default_version_info == NULL)
32322 /* Make default info the first node. */
32323 if (first_v != default_version_info)
32325 default_version_info->prev->next = default_version_info->next;
32326 if (default_version_info->next)
32327 default_version_info->next->prev = default_version_info->prev;
32328 first_v->prev = default_version_info;
32329 default_version_info->next = first_v;
32330 default_version_info->prev = NULL;
32333 default_node = default_version_info->this_node;
32335 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32336 if (targetm.has_ifunc_p ())
32338 struct cgraph_function_version_info *it_v = NULL;
32339 struct cgraph_node *dispatcher_node = NULL;
32340 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32342 /* Right now, the dispatching is done via ifunc. */
32343 dispatch_decl = make_dispatcher_decl (default_node->decl);
32345 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32346 gcc_assert (dispatcher_node != NULL);
32347 dispatcher_node->dispatcher_function = 1;
32348 dispatcher_version_info
32349 = dispatcher_node->insert_new_function_version ();
32350 dispatcher_version_info->next = default_version_info;
32351 dispatcher_node->definition = 1;
32353 /* Set the dispatcher for all the versions. */
32354 it_v = default_version_info;
32355 while (it_v != NULL)
32357 it_v->dispatcher_resolver = dispatch_decl;
32364 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32365 "multiversioning needs ifunc which is not supported "
32369 return dispatch_decl;
32372 /* Make the resolver function decl to dispatch the versions of
32373 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
32374 ifunc alias that will point to the created resolver. Create an
32375 empty basic block in the resolver and store the pointer in
32376 EMPTY_BB. Return the decl of the resolver function. */
32379 make_resolver_func (const tree default_decl,
32380 const tree ifunc_alias_decl,
32381 basic_block *empty_bb)
32383 char *resolver_name;
32384 tree decl, type, decl_name, t;
32386 /* IFUNC's have to be globally visible. So, if the default_decl is
32387 not, then the name of the IFUNC should be made unique. */
32388 if (TREE_PUBLIC (default_decl) == 0)
32390 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
32391 symtab->change_decl_assembler_name (ifunc_alias_decl,
32392 get_identifier (ifunc_name));
32393 XDELETEVEC (ifunc_name);
32396 resolver_name = make_unique_name (default_decl, "resolver", false);
32398 /* The resolver function should return a (void *). */
32399 type = build_function_type_list (ptr_type_node, NULL_TREE);
32401 decl = build_fn_decl (resolver_name, type);
32402 decl_name = get_identifier (resolver_name);
32403 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32405 DECL_NAME (decl) = decl_name;
32406 TREE_USED (decl) = 1;
32407 DECL_ARTIFICIAL (decl) = 1;
32408 DECL_IGNORED_P (decl) = 1;
32409 TREE_PUBLIC (decl) = 0;
32410 DECL_UNINLINABLE (decl) = 1;
32412 /* Resolver is not external, body is generated. */
32413 DECL_EXTERNAL (decl) = 0;
32414 DECL_EXTERNAL (ifunc_alias_decl) = 0;
32416 DECL_CONTEXT (decl) = NULL_TREE;
32417 DECL_INITIAL (decl) = make_node (BLOCK);
32418 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32420 if (DECL_COMDAT_GROUP (default_decl)
32421 || TREE_PUBLIC (default_decl))
32423 /* In this case, each translation unit with a call to this
32424 versioned function will put out a resolver. Ensure it
32425 is comdat to keep just one copy. */
32426 DECL_COMDAT (decl) = 1;
32427 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32429 /* Build result decl and add to function_decl. */
32430 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32431 DECL_ARTIFICIAL (t) = 1;
32432 DECL_IGNORED_P (t) = 1;
32433 DECL_RESULT (decl) = t;
32435 gimplify_function_tree (decl);
32436 push_cfun (DECL_STRUCT_FUNCTION (decl));
32437 *empty_bb = init_lowered_empty_function (decl, false,
32438 profile_count::uninitialized ());
32440 cgraph_node::add_new_function (decl, true);
32441 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32445 gcc_assert (ifunc_alias_decl != NULL);
32446 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
32447 DECL_ATTRIBUTES (ifunc_alias_decl)
32448 = make_attribute ("ifunc", resolver_name,
32449 DECL_ATTRIBUTES (ifunc_alias_decl));
32451 /* Create the alias for dispatch to resolver here. */
32452 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
32453 XDELETEVEC (resolver_name);
32457 /* Generate the dispatching code body to dispatch multi-versioned function
32458 DECL. The target hook is called to process the "target" attributes and
32459 provide the code to dispatch the right function at run-time. NODE points
32460 to the dispatcher decl whose body will be created. */
32463 ix86_generate_version_dispatcher_body (void *node_p)
32465 tree resolver_decl;
32466 basic_block empty_bb;
32467 tree default_ver_decl;
32468 struct cgraph_node *versn;
32469 struct cgraph_node *node;
32471 struct cgraph_function_version_info *node_version_info = NULL;
32472 struct cgraph_function_version_info *versn_info = NULL;
32474 node = (cgraph_node *)node_p;
32476 node_version_info = node->function_version ();
32477 gcc_assert (node->dispatcher_function
32478 && node_version_info != NULL);
32480 if (node_version_info->dispatcher_resolver)
32481 return node_version_info->dispatcher_resolver;
32483 /* The first version in the chain corresponds to the default version. */
32484 default_ver_decl = node_version_info->next->this_node->decl;
32486 /* node is going to be an alias, so remove the finalized bit. */
32487 node->definition = false;
32489 resolver_decl = make_resolver_func (default_ver_decl,
32490 node->decl, &empty_bb);
32492 node_version_info->dispatcher_resolver = resolver_decl;
32494 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32496 auto_vec<tree, 2> fn_ver_vec;
32498 for (versn_info = node_version_info->next; versn_info;
32499 versn_info = versn_info->next)
32501 versn = versn_info->this_node;
32502 /* Check for virtual functions here again, as by this time it should
32503 have been determined if this function needs a vtable index or
32504 not. This happens for methods in derived classes that override
32505 virtual methods in base classes but are not explicitly marked as
32507 if (DECL_VINDEX (versn->decl))
32508 sorry ("Virtual function multiversioning not supported");
32510 fn_ver_vec.safe_push (versn->decl);
32513 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32514 cgraph_edge::rebuild_edges ();
32516 return resolver_decl;
32518 /* This builds the processor_model struct type defined in
32519 libgcc/config/i386/cpuinfo.c */
32522 build_processor_model_struct (void)
32524 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32526 tree field = NULL_TREE, field_chain = NULL_TREE;
32528 tree type = make_node (RECORD_TYPE);
32530 /* The first 3 fields are unsigned int. */
32531 for (i = 0; i < 3; ++i)
32533 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32534 get_identifier (field_name[i]), unsigned_type_node);
32535 if (field_chain != NULL_TREE)
32536 DECL_CHAIN (field) = field_chain;
32537 field_chain = field;
32540 /* The last field is an array of unsigned integers of size one. */
32541 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32542 get_identifier (field_name[3]),
32543 build_array_type (unsigned_type_node,
32544 build_index_type (size_one_node)));
32545 if (field_chain != NULL_TREE)
32546 DECL_CHAIN (field) = field_chain;
32547 field_chain = field;
32549 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32553 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32556 make_var_decl (tree type, const char *name)
32560 new_decl = build_decl (UNKNOWN_LOCATION,
32562 get_identifier(name),
32565 DECL_EXTERNAL (new_decl) = 1;
32566 TREE_STATIC (new_decl) = 1;
32567 TREE_PUBLIC (new_decl) = 1;
32568 DECL_INITIAL (new_decl) = 0;
32569 DECL_ARTIFICIAL (new_decl) = 0;
32570 DECL_PRESERVE_P (new_decl) = 1;
32572 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32573 assemble_variable (new_decl, 0, 0, 0);
32578 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32579 into an integer defined in libgcc/config/i386/cpuinfo.c */
32582 fold_builtin_cpu (tree fndecl, tree *args)
32585 enum ix86_builtins fn_code = (enum ix86_builtins)
32586 DECL_FUNCTION_CODE (fndecl);
32587 tree param_string_cst = NULL;
32589 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32590 enum processor_features
32631 /* These are the values for vendor types and cpu types and subtypes
32632 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32633 the corresponding start value. */
32634 enum processor_model
32644 M_INTEL_SILVERMONT,
32651 M_INTEL_GOLDMONT_PLUS,
32653 M_CPU_SUBTYPE_START,
32654 M_INTEL_COREI7_NEHALEM,
32655 M_INTEL_COREI7_WESTMERE,
32656 M_INTEL_COREI7_SANDYBRIDGE,
32657 M_AMDFAM10H_BARCELONA,
32658 M_AMDFAM10H_SHANGHAI,
32659 M_AMDFAM10H_ISTANBUL,
32660 M_AMDFAM15H_BDVER1,
32661 M_AMDFAM15H_BDVER2,
32662 M_AMDFAM15H_BDVER3,
32663 M_AMDFAM15H_BDVER4,
32664 M_AMDFAM17H_ZNVER1,
32665 M_INTEL_COREI7_IVYBRIDGE,
32666 M_INTEL_COREI7_HASWELL,
32667 M_INTEL_COREI7_BROADWELL,
32668 M_INTEL_COREI7_SKYLAKE,
32669 M_INTEL_COREI7_SKYLAKE_AVX512,
32670 M_INTEL_COREI7_CANNONLAKE,
32671 M_INTEL_COREI7_ICELAKE_CLIENT,
32672 M_INTEL_COREI7_ICELAKE_SERVER,
32673 M_AMDFAM17H_ZNVER2,
32674 M_INTEL_COREI7_CASCADELAKE
32677 static struct _arch_names_table
32679 const char *const name;
32680 const enum processor_model model;
32682 const arch_names_table[] =
32685 {"intel", M_INTEL},
32686 {"atom", M_INTEL_BONNELL},
32687 {"slm", M_INTEL_SILVERMONT},
32688 {"core2", M_INTEL_CORE2},
32689 {"corei7", M_INTEL_COREI7},
32690 {"nehalem", M_INTEL_COREI7_NEHALEM},
32691 {"westmere", M_INTEL_COREI7_WESTMERE},
32692 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32693 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32694 {"haswell", M_INTEL_COREI7_HASWELL},
32695 {"broadwell", M_INTEL_COREI7_BROADWELL},
32696 {"skylake", M_INTEL_COREI7_SKYLAKE},
32697 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
32698 {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
32699 {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT},
32700 {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER},
32701 {"cascadelake", M_INTEL_COREI7_CASCADELAKE},
32702 {"bonnell", M_INTEL_BONNELL},
32703 {"silvermont", M_INTEL_SILVERMONT},
32704 {"goldmont", M_INTEL_GOLDMONT},
32705 {"goldmont-plus", M_INTEL_GOLDMONT_PLUS},
32706 {"tremont", M_INTEL_TREMONT},
32707 {"knl", M_INTEL_KNL},
32708 {"knm", M_INTEL_KNM},
32709 {"amdfam10h", M_AMDFAM10H},
32710 {"barcelona", M_AMDFAM10H_BARCELONA},
32711 {"shanghai", M_AMDFAM10H_SHANGHAI},
32712 {"istanbul", M_AMDFAM10H_ISTANBUL},
32713 {"btver1", M_AMD_BTVER1},
32714 {"amdfam15h", M_AMDFAM15H},
32715 {"bdver1", M_AMDFAM15H_BDVER1},
32716 {"bdver2", M_AMDFAM15H_BDVER2},
32717 {"bdver3", M_AMDFAM15H_BDVER3},
32718 {"bdver4", M_AMDFAM15H_BDVER4},
32719 {"btver2", M_AMD_BTVER2},
32720 {"amdfam17h", M_AMDFAM17H},
32721 {"znver1", M_AMDFAM17H_ZNVER1},
32722 {"znver2", M_AMDFAM17H_ZNVER2},
32725 static struct _isa_names_table
32727 const char *const name;
32728 const enum processor_features feature;
32730 const isa_names_table[] =
32734 {"popcnt", F_POPCNT},
32738 {"ssse3", F_SSSE3},
32739 {"sse4a", F_SSE4_A},
32740 {"sse4.1", F_SSE4_1},
32741 {"sse4.2", F_SSE4_2},
32747 {"avx512f", F_AVX512F},
32751 {"pclmul", F_PCLMUL},
32752 {"avx512vl",F_AVX512VL},
32753 {"avx512bw",F_AVX512BW},
32754 {"avx512dq",F_AVX512DQ},
32755 {"avx512cd",F_AVX512CD},
32756 {"avx512er",F_AVX512ER},
32757 {"avx512pf",F_AVX512PF},
32758 {"avx512vbmi",F_AVX512VBMI},
32759 {"avx512ifma",F_AVX512IFMA},
32760 {"avx5124vnniw",F_AVX5124VNNIW},
32761 {"avx5124fmaps",F_AVX5124FMAPS},
32762 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ},
32763 {"avx512vbmi2", F_AVX512VBMI2},
32765 {"vpclmulqdq", F_VPCLMULQDQ},
32766 {"avx512vnni", F_AVX512VNNI},
32767 {"avx512bitalg", F_AVX512BITALG}
32770 tree __processor_model_type = build_processor_model_struct ();
32771 tree __cpu_model_var = make_var_decl (__processor_model_type,
32775 varpool_node::add (__cpu_model_var);
32777 gcc_assert ((args != NULL) && (*args != NULL));
32779 param_string_cst = *args;
32780 while (param_string_cst
32781 && TREE_CODE (param_string_cst) != STRING_CST)
32783 /* *args must be a expr that can contain other EXPRS leading to a
32785 if (!EXPR_P (param_string_cst))
32787 error ("Parameter to builtin must be a string constant or literal");
32788 return integer_zero_node;
32790 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32793 gcc_assert (param_string_cst);
32795 if (fn_code == IX86_BUILTIN_CPU_IS)
32801 unsigned int field_val = 0;
32802 unsigned int NUM_ARCH_NAMES
32803 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32805 for (i = 0; i < NUM_ARCH_NAMES; i++)
32806 if (strcmp (arch_names_table[i].name,
32807 TREE_STRING_POINTER (param_string_cst)) == 0)
32810 if (i == NUM_ARCH_NAMES)
32812 error ("Parameter to builtin not valid: %s",
32813 TREE_STRING_POINTER (param_string_cst));
32814 return integer_zero_node;
32817 field = TYPE_FIELDS (__processor_model_type);
32818 field_val = arch_names_table[i].model;
32820 /* CPU types are stored in the next field. */
32821 if (field_val > M_CPU_TYPE_START
32822 && field_val < M_CPU_SUBTYPE_START)
32824 field = DECL_CHAIN (field);
32825 field_val -= M_CPU_TYPE_START;
32828 /* CPU subtypes are stored in the next field. */
32829 if (field_val > M_CPU_SUBTYPE_START)
32831 field = DECL_CHAIN ( DECL_CHAIN (field));
32832 field_val -= M_CPU_SUBTYPE_START;
32835 /* Get the appropriate field in __cpu_model. */
32836 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32839 /* Check the value. */
32840 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32841 build_int_cstu (unsigned_type_node, field_val));
32842 return build1 (CONVERT_EXPR, integer_type_node, final);
32844 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32851 unsigned int field_val = 0;
32852 unsigned int NUM_ISA_NAMES
32853 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32855 for (i = 0; i < NUM_ISA_NAMES; i++)
32856 if (strcmp (isa_names_table[i].name,
32857 TREE_STRING_POINTER (param_string_cst)) == 0)
32860 if (i == NUM_ISA_NAMES)
32862 error ("Parameter to builtin not valid: %s",
32863 TREE_STRING_POINTER (param_string_cst));
32864 return integer_zero_node;
32867 if (isa_names_table[i].feature >= 32)
32869 tree __cpu_features2_var = make_var_decl (unsigned_type_node,
32870 "__cpu_features2");
32872 varpool_node::add (__cpu_features2_var);
32873 field_val = (1U << (isa_names_table[i].feature - 32));
32874 /* Return __cpu_features2 & field_val */
32875 final = build2 (BIT_AND_EXPR, unsigned_type_node,
32876 __cpu_features2_var,
32877 build_int_cstu (unsigned_type_node, field_val));
32878 return build1 (CONVERT_EXPR, integer_type_node, final);
32881 field = TYPE_FIELDS (__processor_model_type);
32882 /* Get the last field, which is __cpu_features. */
32883 while (DECL_CHAIN (field))
32884 field = DECL_CHAIN (field);
32886 /* Get the appropriate field: __cpu_model.__cpu_features */
32887 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32890 /* Access the 0th element of __cpu_features array. */
32891 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32892 integer_zero_node, NULL_TREE, NULL_TREE);
32894 field_val = (1U << isa_names_table[i].feature);
32895 /* Return __cpu_model.__cpu_features[0] & field_val */
32896 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32897 build_int_cstu (unsigned_type_node, field_val));
32898 return build1 (CONVERT_EXPR, integer_type_node, final);
32900 gcc_unreachable ();
32903 /* Return the shift count of a vector by scalar shift builtin second argument
32906 ix86_vector_shift_count (tree arg1)
32908 if (tree_fits_uhwi_p (arg1))
32910 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
32912 /* The count argument is weird, passed in as various 128-bit
32913 (or 64-bit) vectors, the low 64 bits from it are the count. */
32914 unsigned char buf[16];
32915 int len = native_encode_expr (arg1, buf, 16);
32918 tree t = native_interpret_expr (uint64_type_node, buf, len);
32919 if (t && tree_fits_uhwi_p (t))
32926 ix86_fold_builtin (tree fndecl, int n_args,
32927 tree *args, bool ignore ATTRIBUTE_UNUSED)
32929 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32931 enum ix86_builtins fn_code = (enum ix86_builtins)
32932 DECL_FUNCTION_CODE (fndecl);
32933 enum rtx_code rcode;
32935 unsigned HOST_WIDE_INT mask;
32939 case IX86_BUILTIN_CPU_IS:
32940 case IX86_BUILTIN_CPU_SUPPORTS:
32941 gcc_assert (n_args == 1);
32942 return fold_builtin_cpu (fndecl, args);
32944 case IX86_BUILTIN_NANQ:
32945 case IX86_BUILTIN_NANSQ:
32947 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32948 const char *str = c_getstr (*args);
32949 int quiet = fn_code == IX86_BUILTIN_NANQ;
32950 REAL_VALUE_TYPE real;
32952 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
32953 return build_real (type, real);
32957 case IX86_BUILTIN_INFQ:
32958 case IX86_BUILTIN_HUGE_VALQ:
32960 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32961 REAL_VALUE_TYPE inf;
32963 return build_real (type, inf);
32966 case IX86_BUILTIN_TZCNT16:
32967 case IX86_BUILTIN_CTZS:
32968 case IX86_BUILTIN_TZCNT32:
32969 case IX86_BUILTIN_TZCNT64:
32970 gcc_assert (n_args == 1);
32971 if (TREE_CODE (args[0]) == INTEGER_CST)
32973 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32974 tree arg = args[0];
32975 if (fn_code == IX86_BUILTIN_TZCNT16
32976 || fn_code == IX86_BUILTIN_CTZS)
32977 arg = fold_convert (short_unsigned_type_node, arg);
32978 if (integer_zerop (arg))
32979 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32981 return fold_const_call (CFN_CTZ, type, arg);
32985 case IX86_BUILTIN_LZCNT16:
32986 case IX86_BUILTIN_CLZS:
32987 case IX86_BUILTIN_LZCNT32:
32988 case IX86_BUILTIN_LZCNT64:
32989 gcc_assert (n_args == 1);
32990 if (TREE_CODE (args[0]) == INTEGER_CST)
32992 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32993 tree arg = args[0];
32994 if (fn_code == IX86_BUILTIN_LZCNT16
32995 || fn_code == IX86_BUILTIN_CLZS)
32996 arg = fold_convert (short_unsigned_type_node, arg);
32997 if (integer_zerop (arg))
32998 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33000 return fold_const_call (CFN_CLZ, type, arg);
33004 case IX86_BUILTIN_BEXTR32:
33005 case IX86_BUILTIN_BEXTR64:
33006 case IX86_BUILTIN_BEXTRI32:
33007 case IX86_BUILTIN_BEXTRI64:
33008 gcc_assert (n_args == 2);
33009 if (tree_fits_uhwi_p (args[1]))
33011 unsigned HOST_WIDE_INT res = 0;
33012 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
33013 unsigned int start = tree_to_uhwi (args[1]);
33014 unsigned int len = (start & 0xff00) >> 8;
33016 if (start >= prec || len == 0)
33018 else if (!tree_fits_uhwi_p (args[0]))
33021 res = tree_to_uhwi (args[0]) >> start;
33024 if (len < HOST_BITS_PER_WIDE_INT)
33025 res &= (HOST_WIDE_INT_1U << len) - 1;
33026 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33030 case IX86_BUILTIN_BZHI32:
33031 case IX86_BUILTIN_BZHI64:
33032 gcc_assert (n_args == 2);
33033 if (tree_fits_uhwi_p (args[1]))
33035 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
33036 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
33039 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
33040 if (!tree_fits_uhwi_p (args[0]))
33042 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
33043 res &= ~(HOST_WIDE_INT_M1U << idx);
33044 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33048 case IX86_BUILTIN_PDEP32:
33049 case IX86_BUILTIN_PDEP64:
33050 gcc_assert (n_args == 2);
33051 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33053 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33054 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33055 unsigned HOST_WIDE_INT res = 0;
33056 unsigned HOST_WIDE_INT m, k = 1;
33057 for (m = 1; m; m <<= 1)
33058 if ((mask & m) != 0)
33060 if ((src & k) != 0)
33064 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33068 case IX86_BUILTIN_PEXT32:
33069 case IX86_BUILTIN_PEXT64:
33070 gcc_assert (n_args == 2);
33071 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33073 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33074 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33075 unsigned HOST_WIDE_INT res = 0;
33076 unsigned HOST_WIDE_INT m, k = 1;
33077 for (m = 1; m; m <<= 1)
33078 if ((mask & m) != 0)
33080 if ((src & m) != 0)
33084 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33088 case IX86_BUILTIN_MOVMSKPS:
33089 case IX86_BUILTIN_PMOVMSKB:
33090 case IX86_BUILTIN_MOVMSKPD:
33091 case IX86_BUILTIN_PMOVMSKB128:
33092 case IX86_BUILTIN_MOVMSKPD256:
33093 case IX86_BUILTIN_MOVMSKPS256:
33094 case IX86_BUILTIN_PMOVMSKB256:
33095 gcc_assert (n_args == 1);
33096 if (TREE_CODE (args[0]) == VECTOR_CST)
33098 HOST_WIDE_INT res = 0;
33099 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
33101 tree e = VECTOR_CST_ELT (args[0], i);
33102 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
33104 if (wi::neg_p (wi::to_wide (e)))
33105 res |= HOST_WIDE_INT_1 << i;
33107 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
33109 if (TREE_REAL_CST (e).sign)
33110 res |= HOST_WIDE_INT_1 << i;
33115 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
33119 case IX86_BUILTIN_PSLLD:
33120 case IX86_BUILTIN_PSLLD128:
33121 case IX86_BUILTIN_PSLLD128_MASK:
33122 case IX86_BUILTIN_PSLLD256:
33123 case IX86_BUILTIN_PSLLD256_MASK:
33124 case IX86_BUILTIN_PSLLD512:
33125 case IX86_BUILTIN_PSLLDI:
33126 case IX86_BUILTIN_PSLLDI128:
33127 case IX86_BUILTIN_PSLLDI128_MASK:
33128 case IX86_BUILTIN_PSLLDI256:
33129 case IX86_BUILTIN_PSLLDI256_MASK:
33130 case IX86_BUILTIN_PSLLDI512:
33131 case IX86_BUILTIN_PSLLQ:
33132 case IX86_BUILTIN_PSLLQ128:
33133 case IX86_BUILTIN_PSLLQ128_MASK:
33134 case IX86_BUILTIN_PSLLQ256:
33135 case IX86_BUILTIN_PSLLQ256_MASK:
33136 case IX86_BUILTIN_PSLLQ512:
33137 case IX86_BUILTIN_PSLLQI:
33138 case IX86_BUILTIN_PSLLQI128:
33139 case IX86_BUILTIN_PSLLQI128_MASK:
33140 case IX86_BUILTIN_PSLLQI256:
33141 case IX86_BUILTIN_PSLLQI256_MASK:
33142 case IX86_BUILTIN_PSLLQI512:
33143 case IX86_BUILTIN_PSLLW:
33144 case IX86_BUILTIN_PSLLW128:
33145 case IX86_BUILTIN_PSLLW128_MASK:
33146 case IX86_BUILTIN_PSLLW256:
33147 case IX86_BUILTIN_PSLLW256_MASK:
33148 case IX86_BUILTIN_PSLLW512_MASK:
33149 case IX86_BUILTIN_PSLLWI:
33150 case IX86_BUILTIN_PSLLWI128:
33151 case IX86_BUILTIN_PSLLWI128_MASK:
33152 case IX86_BUILTIN_PSLLWI256:
33153 case IX86_BUILTIN_PSLLWI256_MASK:
33154 case IX86_BUILTIN_PSLLWI512_MASK:
33158 case IX86_BUILTIN_PSRAD:
33159 case IX86_BUILTIN_PSRAD128:
33160 case IX86_BUILTIN_PSRAD128_MASK:
33161 case IX86_BUILTIN_PSRAD256:
33162 case IX86_BUILTIN_PSRAD256_MASK:
33163 case IX86_BUILTIN_PSRAD512:
33164 case IX86_BUILTIN_PSRADI:
33165 case IX86_BUILTIN_PSRADI128:
33166 case IX86_BUILTIN_PSRADI128_MASK:
33167 case IX86_BUILTIN_PSRADI256:
33168 case IX86_BUILTIN_PSRADI256_MASK:
33169 case IX86_BUILTIN_PSRADI512:
33170 case IX86_BUILTIN_PSRAQ128_MASK:
33171 case IX86_BUILTIN_PSRAQ256_MASK:
33172 case IX86_BUILTIN_PSRAQ512:
33173 case IX86_BUILTIN_PSRAQI128_MASK:
33174 case IX86_BUILTIN_PSRAQI256_MASK:
33175 case IX86_BUILTIN_PSRAQI512:
33176 case IX86_BUILTIN_PSRAW:
33177 case IX86_BUILTIN_PSRAW128:
33178 case IX86_BUILTIN_PSRAW128_MASK:
33179 case IX86_BUILTIN_PSRAW256:
33180 case IX86_BUILTIN_PSRAW256_MASK:
33181 case IX86_BUILTIN_PSRAW512:
33182 case IX86_BUILTIN_PSRAWI:
33183 case IX86_BUILTIN_PSRAWI128:
33184 case IX86_BUILTIN_PSRAWI128_MASK:
33185 case IX86_BUILTIN_PSRAWI256:
33186 case IX86_BUILTIN_PSRAWI256_MASK:
33187 case IX86_BUILTIN_PSRAWI512:
33191 case IX86_BUILTIN_PSRLD:
33192 case IX86_BUILTIN_PSRLD128:
33193 case IX86_BUILTIN_PSRLD128_MASK:
33194 case IX86_BUILTIN_PSRLD256:
33195 case IX86_BUILTIN_PSRLD256_MASK:
33196 case IX86_BUILTIN_PSRLD512:
33197 case IX86_BUILTIN_PSRLDI:
33198 case IX86_BUILTIN_PSRLDI128:
33199 case IX86_BUILTIN_PSRLDI128_MASK:
33200 case IX86_BUILTIN_PSRLDI256:
33201 case IX86_BUILTIN_PSRLDI256_MASK:
33202 case IX86_BUILTIN_PSRLDI512:
33203 case IX86_BUILTIN_PSRLQ:
33204 case IX86_BUILTIN_PSRLQ128:
33205 case IX86_BUILTIN_PSRLQ128_MASK:
33206 case IX86_BUILTIN_PSRLQ256:
33207 case IX86_BUILTIN_PSRLQ256_MASK:
33208 case IX86_BUILTIN_PSRLQ512:
33209 case IX86_BUILTIN_PSRLQI:
33210 case IX86_BUILTIN_PSRLQI128:
33211 case IX86_BUILTIN_PSRLQI128_MASK:
33212 case IX86_BUILTIN_PSRLQI256:
33213 case IX86_BUILTIN_PSRLQI256_MASK:
33214 case IX86_BUILTIN_PSRLQI512:
33215 case IX86_BUILTIN_PSRLW:
33216 case IX86_BUILTIN_PSRLW128:
33217 case IX86_BUILTIN_PSRLW128_MASK:
33218 case IX86_BUILTIN_PSRLW256:
33219 case IX86_BUILTIN_PSRLW256_MASK:
33220 case IX86_BUILTIN_PSRLW512:
33221 case IX86_BUILTIN_PSRLWI:
33222 case IX86_BUILTIN_PSRLWI128:
33223 case IX86_BUILTIN_PSRLWI128_MASK:
33224 case IX86_BUILTIN_PSRLWI256:
33225 case IX86_BUILTIN_PSRLWI256_MASK:
33226 case IX86_BUILTIN_PSRLWI512:
33230 case IX86_BUILTIN_PSLLVV16HI:
33231 case IX86_BUILTIN_PSLLVV16SI:
33232 case IX86_BUILTIN_PSLLVV2DI:
33233 case IX86_BUILTIN_PSLLVV2DI_MASK:
33234 case IX86_BUILTIN_PSLLVV32HI:
33235 case IX86_BUILTIN_PSLLVV4DI:
33236 case IX86_BUILTIN_PSLLVV4DI_MASK:
33237 case IX86_BUILTIN_PSLLVV4SI:
33238 case IX86_BUILTIN_PSLLVV4SI_MASK:
33239 case IX86_BUILTIN_PSLLVV8DI:
33240 case IX86_BUILTIN_PSLLVV8HI:
33241 case IX86_BUILTIN_PSLLVV8SI:
33242 case IX86_BUILTIN_PSLLVV8SI_MASK:
33246 case IX86_BUILTIN_PSRAVQ128:
33247 case IX86_BUILTIN_PSRAVQ256:
33248 case IX86_BUILTIN_PSRAVV16HI:
33249 case IX86_BUILTIN_PSRAVV16SI:
33250 case IX86_BUILTIN_PSRAVV32HI:
33251 case IX86_BUILTIN_PSRAVV4SI:
33252 case IX86_BUILTIN_PSRAVV4SI_MASK:
33253 case IX86_BUILTIN_PSRAVV8DI:
33254 case IX86_BUILTIN_PSRAVV8HI:
33255 case IX86_BUILTIN_PSRAVV8SI:
33256 case IX86_BUILTIN_PSRAVV8SI_MASK:
33260 case IX86_BUILTIN_PSRLVV16HI:
33261 case IX86_BUILTIN_PSRLVV16SI:
33262 case IX86_BUILTIN_PSRLVV2DI:
33263 case IX86_BUILTIN_PSRLVV2DI_MASK:
33264 case IX86_BUILTIN_PSRLVV32HI:
33265 case IX86_BUILTIN_PSRLVV4DI:
33266 case IX86_BUILTIN_PSRLVV4DI_MASK:
33267 case IX86_BUILTIN_PSRLVV4SI:
33268 case IX86_BUILTIN_PSRLVV4SI_MASK:
33269 case IX86_BUILTIN_PSRLVV8DI:
33270 case IX86_BUILTIN_PSRLVV8HI:
33271 case IX86_BUILTIN_PSRLVV8SI:
33272 case IX86_BUILTIN_PSRLVV8SI_MASK:
33278 gcc_assert (n_args >= 2);
33279 if (TREE_CODE (args[0]) != VECTOR_CST)
33281 mask = HOST_WIDE_INT_M1U;
33284 /* This is masked shift. */
33285 if (!tree_fits_uhwi_p (args[n_args - 1])
33286 || TREE_SIDE_EFFECTS (args[n_args - 2]))
33288 mask = tree_to_uhwi (args[n_args - 1]);
33289 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
33290 mask |= HOST_WIDE_INT_M1U << elems;
33291 if (mask != HOST_WIDE_INT_M1U
33292 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
33294 if (mask == (HOST_WIDE_INT_M1U << elems))
33295 return args[n_args - 2];
33297 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
33299 if (tree tem = (is_vshift ? integer_one_node
33300 : ix86_vector_shift_count (args[1])))
33302 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
33303 unsigned HOST_WIDE_INT prec
33304 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
33305 if (count == 0 && mask == HOST_WIDE_INT_M1U)
33309 if (rcode == ASHIFTRT)
33311 else if (mask == HOST_WIDE_INT_M1U)
33312 return build_zero_cst (TREE_TYPE (args[0]));
33314 tree countt = NULL_TREE;
33318 countt = integer_zero_node;
33320 countt = build_int_cst (integer_type_node, count);
33322 tree_vector_builder builder;
33323 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
33325 unsigned int cnt = builder.encoded_nelts ();
33326 for (unsigned int i = 0; i < cnt; ++i)
33328 tree elt = VECTOR_CST_ELT (args[0], i);
33329 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
33331 tree type = TREE_TYPE (elt);
33332 if (rcode == LSHIFTRT)
33333 elt = fold_convert (unsigned_type_for (type), elt);
33336 countt = VECTOR_CST_ELT (args[1], i);
33337 if (TREE_CODE (countt) != INTEGER_CST
33338 || TREE_OVERFLOW (countt))
33340 if (wi::neg_p (wi::to_wide (countt))
33341 || wi::to_widest (countt) >= prec)
33343 if (rcode == ASHIFTRT)
33344 countt = build_int_cst (TREE_TYPE (countt),
33348 elt = build_zero_cst (TREE_TYPE (elt));
33349 countt = build_zero_cst (TREE_TYPE (countt));
33353 else if (count >= prec)
33354 elt = build_zero_cst (TREE_TYPE (elt));
33355 elt = const_binop (rcode == ASHIFT
33356 ? LSHIFT_EXPR : RSHIFT_EXPR,
33357 TREE_TYPE (elt), elt, countt);
33358 if (!elt || TREE_CODE (elt) != INTEGER_CST)
33360 if (rcode == LSHIFTRT)
33361 elt = fold_convert (type, elt);
33362 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
33364 elt = VECTOR_CST_ELT (args[n_args - 2], i);
33365 if (TREE_CODE (elt) != INTEGER_CST
33366 || TREE_OVERFLOW (elt))
33369 builder.quick_push (elt);
33371 return builder.build ();
33380 #ifdef SUBTARGET_FOLD_BUILTIN
33381 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
33387 /* Fold a MD builtin (use ix86_fold_builtin for folding into
33388 constant) in GIMPLE. */
33391 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
33393 gimple *stmt = gsi_stmt (*gsi);
33394 tree fndecl = gimple_call_fndecl (stmt);
33395 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
33396 int n_args = gimple_call_num_args (stmt);
33397 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
33398 tree decl = NULL_TREE;
33400 enum rtx_code rcode;
33401 unsigned HOST_WIDE_INT count;
33406 case IX86_BUILTIN_TZCNT32:
33407 decl = builtin_decl_implicit (BUILT_IN_CTZ);
33408 goto fold_tzcnt_lzcnt;
33410 case IX86_BUILTIN_TZCNT64:
33411 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
33412 goto fold_tzcnt_lzcnt;
33414 case IX86_BUILTIN_LZCNT32:
33415 decl = builtin_decl_implicit (BUILT_IN_CLZ);
33416 goto fold_tzcnt_lzcnt;
33418 case IX86_BUILTIN_LZCNT64:
33419 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
33420 goto fold_tzcnt_lzcnt;
33423 gcc_assert (n_args == 1);
33424 arg0 = gimple_call_arg (stmt, 0);
33425 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
33427 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
33428 /* If arg0 is provably non-zero, optimize into generic
33429 __builtin_c[tl]z{,ll} function the middle-end handles
33431 if (!expr_not_equal_to (arg0, wi::zero (prec)))
33434 location_t loc = gimple_location (stmt);
33435 gimple *g = gimple_build_call (decl, 1, arg0);
33436 gimple_set_location (g, loc);
33437 tree lhs = make_ssa_name (integer_type_node);
33438 gimple_call_set_lhs (g, lhs);
33439 gsi_insert_before (gsi, g, GSI_SAME_STMT);
33440 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
33441 gimple_set_location (g, loc);
33442 gsi_replace (gsi, g, false);
33447 case IX86_BUILTIN_BZHI32:
33448 case IX86_BUILTIN_BZHI64:
33449 gcc_assert (n_args == 2);
33450 arg1 = gimple_call_arg (stmt, 1);
33451 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
33453 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
33454 arg0 = gimple_call_arg (stmt, 0);
33455 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
33457 location_t loc = gimple_location (stmt);
33458 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33459 gimple_set_location (g, loc);
33460 gsi_replace (gsi, g, false);
33465 case IX86_BUILTIN_PDEP32:
33466 case IX86_BUILTIN_PDEP64:
33467 case IX86_BUILTIN_PEXT32:
33468 case IX86_BUILTIN_PEXT64:
33469 gcc_assert (n_args == 2);
33470 arg1 = gimple_call_arg (stmt, 1);
33471 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
33473 location_t loc = gimple_location (stmt);
33474 arg0 = gimple_call_arg (stmt, 0);
33475 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33476 gimple_set_location (g, loc);
33477 gsi_replace (gsi, g, false);
33482 case IX86_BUILTIN_PSLLD:
33483 case IX86_BUILTIN_PSLLD128:
33484 case IX86_BUILTIN_PSLLD128_MASK:
33485 case IX86_BUILTIN_PSLLD256:
33486 case IX86_BUILTIN_PSLLD256_MASK:
33487 case IX86_BUILTIN_PSLLD512:
33488 case IX86_BUILTIN_PSLLDI:
33489 case IX86_BUILTIN_PSLLDI128:
33490 case IX86_BUILTIN_PSLLDI128_MASK:
33491 case IX86_BUILTIN_PSLLDI256:
33492 case IX86_BUILTIN_PSLLDI256_MASK:
33493 case IX86_BUILTIN_PSLLDI512:
33494 case IX86_BUILTIN_PSLLQ:
33495 case IX86_BUILTIN_PSLLQ128:
33496 case IX86_BUILTIN_PSLLQ128_MASK:
33497 case IX86_BUILTIN_PSLLQ256:
33498 case IX86_BUILTIN_PSLLQ256_MASK:
33499 case IX86_BUILTIN_PSLLQ512:
33500 case IX86_BUILTIN_PSLLQI:
33501 case IX86_BUILTIN_PSLLQI128:
33502 case IX86_BUILTIN_PSLLQI128_MASK:
33503 case IX86_BUILTIN_PSLLQI256:
33504 case IX86_BUILTIN_PSLLQI256_MASK:
33505 case IX86_BUILTIN_PSLLQI512:
33506 case IX86_BUILTIN_PSLLW:
33507 case IX86_BUILTIN_PSLLW128:
33508 case IX86_BUILTIN_PSLLW128_MASK:
33509 case IX86_BUILTIN_PSLLW256:
33510 case IX86_BUILTIN_PSLLW256_MASK:
33511 case IX86_BUILTIN_PSLLW512_MASK:
33512 case IX86_BUILTIN_PSLLWI:
33513 case IX86_BUILTIN_PSLLWI128:
33514 case IX86_BUILTIN_PSLLWI128_MASK:
33515 case IX86_BUILTIN_PSLLWI256:
33516 case IX86_BUILTIN_PSLLWI256_MASK:
33517 case IX86_BUILTIN_PSLLWI512_MASK:
33521 case IX86_BUILTIN_PSRAD:
33522 case IX86_BUILTIN_PSRAD128:
33523 case IX86_BUILTIN_PSRAD128_MASK:
33524 case IX86_BUILTIN_PSRAD256:
33525 case IX86_BUILTIN_PSRAD256_MASK:
33526 case IX86_BUILTIN_PSRAD512:
33527 case IX86_BUILTIN_PSRADI:
33528 case IX86_BUILTIN_PSRADI128:
33529 case IX86_BUILTIN_PSRADI128_MASK:
33530 case IX86_BUILTIN_PSRADI256:
33531 case IX86_BUILTIN_PSRADI256_MASK:
33532 case IX86_BUILTIN_PSRADI512:
33533 case IX86_BUILTIN_PSRAQ128_MASK:
33534 case IX86_BUILTIN_PSRAQ256_MASK:
33535 case IX86_BUILTIN_PSRAQ512:
33536 case IX86_BUILTIN_PSRAQI128_MASK:
33537 case IX86_BUILTIN_PSRAQI256_MASK:
33538 case IX86_BUILTIN_PSRAQI512:
33539 case IX86_BUILTIN_PSRAW:
33540 case IX86_BUILTIN_PSRAW128:
33541 case IX86_BUILTIN_PSRAW128_MASK:
33542 case IX86_BUILTIN_PSRAW256:
33543 case IX86_BUILTIN_PSRAW256_MASK:
33544 case IX86_BUILTIN_PSRAW512:
33545 case IX86_BUILTIN_PSRAWI:
33546 case IX86_BUILTIN_PSRAWI128:
33547 case IX86_BUILTIN_PSRAWI128_MASK:
33548 case IX86_BUILTIN_PSRAWI256:
33549 case IX86_BUILTIN_PSRAWI256_MASK:
33550 case IX86_BUILTIN_PSRAWI512:
33554 case IX86_BUILTIN_PSRLD:
33555 case IX86_BUILTIN_PSRLD128:
33556 case IX86_BUILTIN_PSRLD128_MASK:
33557 case IX86_BUILTIN_PSRLD256:
33558 case IX86_BUILTIN_PSRLD256_MASK:
33559 case IX86_BUILTIN_PSRLD512:
33560 case IX86_BUILTIN_PSRLDI:
33561 case IX86_BUILTIN_PSRLDI128:
33562 case IX86_BUILTIN_PSRLDI128_MASK:
33563 case IX86_BUILTIN_PSRLDI256:
33564 case IX86_BUILTIN_PSRLDI256_MASK:
33565 case IX86_BUILTIN_PSRLDI512:
33566 case IX86_BUILTIN_PSRLQ:
33567 case IX86_BUILTIN_PSRLQ128:
33568 case IX86_BUILTIN_PSRLQ128_MASK:
33569 case IX86_BUILTIN_PSRLQ256:
33570 case IX86_BUILTIN_PSRLQ256_MASK:
33571 case IX86_BUILTIN_PSRLQ512:
33572 case IX86_BUILTIN_PSRLQI:
33573 case IX86_BUILTIN_PSRLQI128:
33574 case IX86_BUILTIN_PSRLQI128_MASK:
33575 case IX86_BUILTIN_PSRLQI256:
33576 case IX86_BUILTIN_PSRLQI256_MASK:
33577 case IX86_BUILTIN_PSRLQI512:
33578 case IX86_BUILTIN_PSRLW:
33579 case IX86_BUILTIN_PSRLW128:
33580 case IX86_BUILTIN_PSRLW128_MASK:
33581 case IX86_BUILTIN_PSRLW256:
33582 case IX86_BUILTIN_PSRLW256_MASK:
33583 case IX86_BUILTIN_PSRLW512:
33584 case IX86_BUILTIN_PSRLWI:
33585 case IX86_BUILTIN_PSRLWI128:
33586 case IX86_BUILTIN_PSRLWI128_MASK:
33587 case IX86_BUILTIN_PSRLWI256:
33588 case IX86_BUILTIN_PSRLWI256_MASK:
33589 case IX86_BUILTIN_PSRLWI512:
33593 case IX86_BUILTIN_PSLLVV16HI:
33594 case IX86_BUILTIN_PSLLVV16SI:
33595 case IX86_BUILTIN_PSLLVV2DI:
33596 case IX86_BUILTIN_PSLLVV2DI_MASK:
33597 case IX86_BUILTIN_PSLLVV32HI:
33598 case IX86_BUILTIN_PSLLVV4DI:
33599 case IX86_BUILTIN_PSLLVV4DI_MASK:
33600 case IX86_BUILTIN_PSLLVV4SI:
33601 case IX86_BUILTIN_PSLLVV4SI_MASK:
33602 case IX86_BUILTIN_PSLLVV8DI:
33603 case IX86_BUILTIN_PSLLVV8HI:
33604 case IX86_BUILTIN_PSLLVV8SI:
33605 case IX86_BUILTIN_PSLLVV8SI_MASK:
33609 case IX86_BUILTIN_PSRAVQ128:
33610 case IX86_BUILTIN_PSRAVQ256:
33611 case IX86_BUILTIN_PSRAVV16HI:
33612 case IX86_BUILTIN_PSRAVV16SI:
33613 case IX86_BUILTIN_PSRAVV32HI:
33614 case IX86_BUILTIN_PSRAVV4SI:
33615 case IX86_BUILTIN_PSRAVV4SI_MASK:
33616 case IX86_BUILTIN_PSRAVV8DI:
33617 case IX86_BUILTIN_PSRAVV8HI:
33618 case IX86_BUILTIN_PSRAVV8SI:
33619 case IX86_BUILTIN_PSRAVV8SI_MASK:
33623 case IX86_BUILTIN_PSRLVV16HI:
33624 case IX86_BUILTIN_PSRLVV16SI:
33625 case IX86_BUILTIN_PSRLVV2DI:
33626 case IX86_BUILTIN_PSRLVV2DI_MASK:
33627 case IX86_BUILTIN_PSRLVV32HI:
33628 case IX86_BUILTIN_PSRLVV4DI:
33629 case IX86_BUILTIN_PSRLVV4DI_MASK:
33630 case IX86_BUILTIN_PSRLVV4SI:
33631 case IX86_BUILTIN_PSRLVV4SI_MASK:
33632 case IX86_BUILTIN_PSRLVV8DI:
33633 case IX86_BUILTIN_PSRLVV8HI:
33634 case IX86_BUILTIN_PSRLVV8SI:
33635 case IX86_BUILTIN_PSRLVV8SI_MASK:
33641 gcc_assert (n_args >= 2);
33642 arg0 = gimple_call_arg (stmt, 0);
33643 arg1 = gimple_call_arg (stmt, 1);
33646 /* This is masked shift. Only optimize if the mask is all ones. */
33647 tree argl = gimple_call_arg (stmt, n_args - 1);
33648 if (!tree_fits_uhwi_p (argl))
33650 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
33651 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
33652 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
33657 if (TREE_CODE (arg1) != VECTOR_CST)
33659 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
33660 if (integer_zerop (arg1))
33662 else if (rcode == ASHIFTRT)
33665 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
33667 tree elt = VECTOR_CST_ELT (arg1, i);
33668 if (!wi::neg_p (wi::to_wide (elt))
33669 && wi::to_widest (elt) < count)
33675 arg1 = ix86_vector_shift_count (arg1);
33678 count = tree_to_uhwi (arg1);
33682 /* Just return the first argument for shift by 0. */
33683 location_t loc = gimple_location (stmt);
33684 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33685 gimple_set_location (g, loc);
33686 gsi_replace (gsi, g, false);
33689 if (rcode != ASHIFTRT
33690 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
33692 /* For shift counts equal or greater than precision, except for
33693 arithmetic right shift the result is zero. */
33694 location_t loc = gimple_location (stmt);
33695 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
33696 build_zero_cst (TREE_TYPE (arg0)));
33697 gimple_set_location (g, loc);
33698 gsi_replace (gsi, g, false);
33710 /* Make builtins to detect cpu type and features supported. NAME is
33711 the builtin name, CODE is the builtin code, and FTYPE is the function
33712 type of the builtin. */
33715 make_cpu_type_builtin (const char* name, int code,
33716 enum ix86_builtin_func_type ftype, bool is_const)
33721 type = ix86_get_builtin_func_type (ftype);
33722 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
33724 gcc_assert (decl != NULL_TREE);
33725 ix86_builtins[(int) code] = decl;
33726 TREE_READONLY (decl) = is_const;
33729 /* Make builtins to get CPU type and features supported. The created
33732 __builtin_cpu_init (), to detect cpu type and features,
33733 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
33734 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
33738 ix86_init_platform_type_builtins (void)
33740 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
33741 INT_FTYPE_VOID, false);
33742 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
33743 INT_FTYPE_PCCHAR, true);
33744 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
33745 INT_FTYPE_PCCHAR, true);
33748 /* Internal method for ix86_init_builtins. */
33751 ix86_init_builtins_va_builtins_abi (void)
33753 tree ms_va_ref, sysv_va_ref;
33754 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
33755 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
33756 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
33757 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
33761 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
33762 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
33763 ms_va_ref = build_reference_type (ms_va_list_type_node);
33764 sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
33766 fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref,
33769 = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33771 = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
33772 fnvoid_va_start_sysv
33773 = build_varargs_function_type_list (void_type_node, sysv_va_ref,
33776 = build_function_type_list (void_type_node, ms_va_ref,
33777 ms_va_list_type_node, NULL_TREE);
33778 fnvoid_va_copy_sysv
33779 = build_function_type_list (void_type_node, sysv_va_ref,
33780 sysv_va_ref, NULL_TREE);
33782 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
33783 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
33784 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
33785 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
33786 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
33787 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
33788 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
33789 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33790 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
33791 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33792 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
33793 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33797 ix86_init_builtin_types (void)
33799 tree float80_type_node, const_string_type_node;
33801 /* The __float80 type. */
33802 float80_type_node = long_double_type_node;
33803 if (TYPE_MODE (float80_type_node) != XFmode)
33805 if (float64x_type_node != NULL_TREE
33806 && TYPE_MODE (float64x_type_node) == XFmode)
33807 float80_type_node = float64x_type_node;
33810 /* The __float80 type. */
33811 float80_type_node = make_node (REAL_TYPE);
33813 TYPE_PRECISION (float80_type_node) = 80;
33814 layout_type (float80_type_node);
33817 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
33819 /* The __float128 type. The node has already been created as
33820 _Float128, so we only need to register the __float128 name for
33822 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
33824 const_string_type_node
33825 = build_pointer_type (build_qualified_type
33826 (char_type_node, TYPE_QUAL_CONST));
33828 /* This macro is built by i386-builtin-types.awk. */
33829 DEFINE_BUILTIN_PRIMITIVE_TYPES;
33833 ix86_init_builtins (void)
33837 ix86_init_builtin_types ();
33839 /* Builtins to get CPU type and features. */
33840 ix86_init_platform_type_builtins ();
33842 /* TFmode support builtins. */
33843 def_builtin_const (0, "__builtin_infq",
33844 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
33845 def_builtin_const (0, "__builtin_huge_valq",
33846 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
33848 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
33849 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
33850 BUILT_IN_MD, "nanq", NULL_TREE);
33851 TREE_READONLY (decl) = 1;
33852 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
33854 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
33855 BUILT_IN_MD, "nansq", NULL_TREE);
33856 TREE_READONLY (decl) = 1;
33857 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
33859 /* We will expand them to normal call if SSE isn't available since
33860 they are used by libgcc. */
33861 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
33862 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
33863 BUILT_IN_MD, "__fabstf2", NULL_TREE);
33864 TREE_READONLY (decl) = 1;
33865 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
33867 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
33868 decl = add_builtin_function ("__builtin_copysignq", ftype,
33869 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
33870 "__copysigntf3", NULL_TREE);
33871 TREE_READONLY (decl) = 1;
33872 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
33874 ix86_init_tm_builtins ();
33875 ix86_init_mmx_sse_builtins ();
33878 ix86_init_builtins_va_builtins_abi ();
33880 #ifdef SUBTARGET_INIT_BUILTINS
33881 SUBTARGET_INIT_BUILTINS;
33885 /* Return the ix86 builtin for CODE. */
33888 ix86_builtin_decl (unsigned code, bool)
33890 if (code >= IX86_BUILTIN_MAX)
33891 return error_mark_node;
33893 return ix86_builtins[code];
33896 /* Errors in the source file can cause expand_expr to return const0_rtx
33897 where we expect a vector. To avoid crashing, use one of the vector
33898 clear instructions. */
33900 safe_vector_operand (rtx x, machine_mode mode)
33902 if (x == const0_rtx)
33903 x = CONST0_RTX (mode);
33907 /* Fixup modeless constants to fit required mode. */
33909 fixup_modeless_constant (rtx x, machine_mode mode)
33911 if (GET_MODE (x) == VOIDmode)
33912 x = convert_to_mode (mode, x, 1);
33916 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
33919 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
33922 tree arg0 = CALL_EXPR_ARG (exp, 0);
33923 tree arg1 = CALL_EXPR_ARG (exp, 1);
33924 rtx op0 = expand_normal (arg0);
33925 rtx op1 = expand_normal (arg1);
33926 machine_mode tmode = insn_data[icode].operand[0].mode;
33927 machine_mode mode0 = insn_data[icode].operand[1].mode;
33928 machine_mode mode1 = insn_data[icode].operand[2].mode;
33930 if (VECTOR_MODE_P (mode0))
33931 op0 = safe_vector_operand (op0, mode0);
33932 if (VECTOR_MODE_P (mode1))
33933 op1 = safe_vector_operand (op1, mode1);
33935 if (optimize || !target
33936 || GET_MODE (target) != tmode
33937 || !insn_data[icode].operand[0].predicate (target, tmode))
33938 target = gen_reg_rtx (tmode);
33940 if (GET_MODE (op1) == SImode && mode1 == TImode)
33942 rtx x = gen_reg_rtx (V4SImode);
33943 emit_insn (gen_sse2_loadd (x, op1));
33944 op1 = gen_lowpart (TImode, x);
33947 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33948 op0 = copy_to_mode_reg (mode0, op0);
33949 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33950 op1 = copy_to_mode_reg (mode1, op1);
33952 pat = GEN_FCN (icode) (target, op0, op1);
33961 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33964 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33965 enum ix86_builtin_func_type m_type,
33966 enum rtx_code sub_code)
33971 bool comparison_p = false;
33973 bool last_arg_constant = false;
33974 int num_memory = 0;
33980 machine_mode tmode = insn_data[icode].operand[0].mode;
33984 case MULTI_ARG_4_DF2_DI_I:
33985 case MULTI_ARG_4_DF2_DI_I1:
33986 case MULTI_ARG_4_SF2_SI_I:
33987 case MULTI_ARG_4_SF2_SI_I1:
33989 last_arg_constant = true;
33992 case MULTI_ARG_3_SF:
33993 case MULTI_ARG_3_DF:
33994 case MULTI_ARG_3_SF2:
33995 case MULTI_ARG_3_DF2:
33996 case MULTI_ARG_3_DI:
33997 case MULTI_ARG_3_SI:
33998 case MULTI_ARG_3_SI_DI:
33999 case MULTI_ARG_3_HI:
34000 case MULTI_ARG_3_HI_SI:
34001 case MULTI_ARG_3_QI:
34002 case MULTI_ARG_3_DI2:
34003 case MULTI_ARG_3_SI2:
34004 case MULTI_ARG_3_HI2:
34005 case MULTI_ARG_3_QI2:
34009 case MULTI_ARG_2_SF:
34010 case MULTI_ARG_2_DF:
34011 case MULTI_ARG_2_DI:
34012 case MULTI_ARG_2_SI:
34013 case MULTI_ARG_2_HI:
34014 case MULTI_ARG_2_QI:
34018 case MULTI_ARG_2_DI_IMM:
34019 case MULTI_ARG_2_SI_IMM:
34020 case MULTI_ARG_2_HI_IMM:
34021 case MULTI_ARG_2_QI_IMM:
34023 last_arg_constant = true;
34026 case MULTI_ARG_1_SF:
34027 case MULTI_ARG_1_DF:
34028 case MULTI_ARG_1_SF2:
34029 case MULTI_ARG_1_DF2:
34030 case MULTI_ARG_1_DI:
34031 case MULTI_ARG_1_SI:
34032 case MULTI_ARG_1_HI:
34033 case MULTI_ARG_1_QI:
34034 case MULTI_ARG_1_SI_DI:
34035 case MULTI_ARG_1_HI_DI:
34036 case MULTI_ARG_1_HI_SI:
34037 case MULTI_ARG_1_QI_DI:
34038 case MULTI_ARG_1_QI_SI:
34039 case MULTI_ARG_1_QI_HI:
34043 case MULTI_ARG_2_DI_CMP:
34044 case MULTI_ARG_2_SI_CMP:
34045 case MULTI_ARG_2_HI_CMP:
34046 case MULTI_ARG_2_QI_CMP:
34048 comparison_p = true;
34051 case MULTI_ARG_2_SF_TF:
34052 case MULTI_ARG_2_DF_TF:
34053 case MULTI_ARG_2_DI_TF:
34054 case MULTI_ARG_2_SI_TF:
34055 case MULTI_ARG_2_HI_TF:
34056 case MULTI_ARG_2_QI_TF:
34062 gcc_unreachable ();
34065 if (optimize || !target
34066 || GET_MODE (target) != tmode
34067 || !insn_data[icode].operand[0].predicate (target, tmode))
34068 target = gen_reg_rtx (tmode);
34069 else if (memory_operand (target, tmode))
34072 gcc_assert (nargs <= 4);
34074 for (i = 0; i < nargs; i++)
34076 tree arg = CALL_EXPR_ARG (exp, i);
34077 rtx op = expand_normal (arg);
34078 int adjust = (comparison_p) ? 1 : 0;
34079 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
34081 if (last_arg_constant && i == nargs - 1)
34083 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
34085 enum insn_code new_icode = icode;
34088 case CODE_FOR_xop_vpermil2v2df3:
34089 case CODE_FOR_xop_vpermil2v4sf3:
34090 case CODE_FOR_xop_vpermil2v4df3:
34091 case CODE_FOR_xop_vpermil2v8sf3:
34092 error ("the last argument must be a 2-bit immediate");
34093 return gen_reg_rtx (tmode);
34094 case CODE_FOR_xop_rotlv2di3:
34095 new_icode = CODE_FOR_rotlv2di3;
34097 case CODE_FOR_xop_rotlv4si3:
34098 new_icode = CODE_FOR_rotlv4si3;
34100 case CODE_FOR_xop_rotlv8hi3:
34101 new_icode = CODE_FOR_rotlv8hi3;
34103 case CODE_FOR_xop_rotlv16qi3:
34104 new_icode = CODE_FOR_rotlv16qi3;
34106 if (CONST_INT_P (op))
34108 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
34109 op = GEN_INT (INTVAL (op) & mask);
34110 gcc_checking_assert
34111 (insn_data[icode].operand[i + 1].predicate (op, mode));
34115 gcc_checking_assert
34117 && insn_data[new_icode].operand[0].mode == tmode
34118 && insn_data[new_icode].operand[1].mode == tmode
34119 && insn_data[new_icode].operand[2].mode == mode
34120 && insn_data[new_icode].operand[0].predicate
34121 == insn_data[icode].operand[0].predicate
34122 && insn_data[new_icode].operand[1].predicate
34123 == insn_data[icode].operand[1].predicate);
34129 gcc_unreachable ();
34136 if (VECTOR_MODE_P (mode))
34137 op = safe_vector_operand (op, mode);
34139 /* If we aren't optimizing, only allow one memory operand to be
34141 if (memory_operand (op, mode))
34144 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
34147 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
34149 op = force_reg (mode, op);
34153 args[i].mode = mode;
34159 pat = GEN_FCN (icode) (target, args[0].op);
34164 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34165 GEN_INT ((int)sub_code));
34166 else if (! comparison_p)
34167 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34170 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
34174 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
34179 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34183 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
34187 gcc_unreachable ();
34197 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
34198 insns with vec_merge. */
34201 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
34205 tree arg0 = CALL_EXPR_ARG (exp, 0);
34206 rtx op1, op0 = expand_normal (arg0);
34207 machine_mode tmode = insn_data[icode].operand[0].mode;
34208 machine_mode mode0 = insn_data[icode].operand[1].mode;
34210 if (optimize || !target
34211 || GET_MODE (target) != tmode
34212 || !insn_data[icode].operand[0].predicate (target, tmode))
34213 target = gen_reg_rtx (tmode);
34215 if (VECTOR_MODE_P (mode0))
34216 op0 = safe_vector_operand (op0, mode0);
34218 if ((optimize && !register_operand (op0, mode0))
34219 || !insn_data[icode].operand[1].predicate (op0, mode0))
34220 op0 = copy_to_mode_reg (mode0, op0);
34223 if (!insn_data[icode].operand[2].predicate (op1, mode0))
34224 op1 = copy_to_mode_reg (mode0, op1);
34226 pat = GEN_FCN (icode) (target, op0, op1);
34233 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
34236 ix86_expand_sse_compare (const struct builtin_description *d,
34237 tree exp, rtx target, bool swap)
34240 tree arg0 = CALL_EXPR_ARG (exp, 0);
34241 tree arg1 = CALL_EXPR_ARG (exp, 1);
34242 rtx op0 = expand_normal (arg0);
34243 rtx op1 = expand_normal (arg1);
34245 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34246 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34247 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34248 enum rtx_code comparison = d->comparison;
34250 if (VECTOR_MODE_P (mode0))
34251 op0 = safe_vector_operand (op0, mode0);
34252 if (VECTOR_MODE_P (mode1))
34253 op1 = safe_vector_operand (op1, mode1);
34255 /* Swap operands if we have a comparison that isn't available in
34258 std::swap (op0, op1);
34260 if (optimize || !target
34261 || GET_MODE (target) != tmode
34262 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34263 target = gen_reg_rtx (tmode);
34265 if ((optimize && !register_operand (op0, mode0))
34266 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
34267 op0 = copy_to_mode_reg (mode0, op0);
34268 if ((optimize && !register_operand (op1, mode1))
34269 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
34270 op1 = copy_to_mode_reg (mode1, op1);
34272 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
34273 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34280 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
34283 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
34287 tree arg0 = CALL_EXPR_ARG (exp, 0);
34288 tree arg1 = CALL_EXPR_ARG (exp, 1);
34289 rtx op0 = expand_normal (arg0);
34290 rtx op1 = expand_normal (arg1);
34291 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34292 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34293 enum rtx_code comparison = d->comparison;
34295 if (VECTOR_MODE_P (mode0))
34296 op0 = safe_vector_operand (op0, mode0);
34297 if (VECTOR_MODE_P (mode1))
34298 op1 = safe_vector_operand (op1, mode1);
34300 /* Swap operands if we have a comparison that isn't available in
34302 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
34303 std::swap (op0, op1);
34305 target = gen_reg_rtx (SImode);
34306 emit_move_insn (target, const0_rtx);
34307 target = gen_rtx_SUBREG (QImode, target, 0);
34309 if ((optimize && !register_operand (op0, mode0))
34310 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34311 op0 = copy_to_mode_reg (mode0, op0);
34312 if ((optimize && !register_operand (op1, mode1))
34313 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34314 op1 = copy_to_mode_reg (mode1, op1);
34316 pat = GEN_FCN (d->icode) (op0, op1);
34320 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34321 gen_rtx_fmt_ee (comparison, QImode,
34325 return SUBREG_REG (target);
34328 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
34331 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
34335 tree arg0 = CALL_EXPR_ARG (exp, 0);
34336 rtx op1, op0 = expand_normal (arg0);
34337 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34338 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34340 if (optimize || target == 0
34341 || GET_MODE (target) != tmode
34342 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34343 target = gen_reg_rtx (tmode);
34345 if (VECTOR_MODE_P (mode0))
34346 op0 = safe_vector_operand (op0, mode0);
34348 if ((optimize && !register_operand (op0, mode0))
34349 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34350 op0 = copy_to_mode_reg (mode0, op0);
34352 op1 = GEN_INT (d->comparison);
34354 pat = GEN_FCN (d->icode) (target, op0, op1);
34362 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
34363 tree exp, rtx target)
34366 tree arg0 = CALL_EXPR_ARG (exp, 0);
34367 tree arg1 = CALL_EXPR_ARG (exp, 1);
34368 rtx op0 = expand_normal (arg0);
34369 rtx op1 = expand_normal (arg1);
34371 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34372 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34373 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34375 if (optimize || target == 0
34376 || GET_MODE (target) != tmode
34377 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34378 target = gen_reg_rtx (tmode);
34380 op0 = safe_vector_operand (op0, mode0);
34381 op1 = safe_vector_operand (op1, mode1);
34383 if ((optimize && !register_operand (op0, mode0))
34384 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34385 op0 = copy_to_mode_reg (mode0, op0);
34386 if ((optimize && !register_operand (op1, mode1))
34387 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34388 op1 = copy_to_mode_reg (mode1, op1);
34390 op2 = GEN_INT (d->comparison);
34392 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34399 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
34402 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
34406 tree arg0 = CALL_EXPR_ARG (exp, 0);
34407 tree arg1 = CALL_EXPR_ARG (exp, 1);
34408 rtx op0 = expand_normal (arg0);
34409 rtx op1 = expand_normal (arg1);
34410 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34411 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34412 enum rtx_code comparison = d->comparison;
34414 if (VECTOR_MODE_P (mode0))
34415 op0 = safe_vector_operand (op0, mode0);
34416 if (VECTOR_MODE_P (mode1))
34417 op1 = safe_vector_operand (op1, mode1);
34419 target = gen_reg_rtx (SImode);
34420 emit_move_insn (target, const0_rtx);
34421 target = gen_rtx_SUBREG (QImode, target, 0);
34423 if ((optimize && !register_operand (op0, mode0))
34424 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34425 op0 = copy_to_mode_reg (mode0, op0);
34426 if ((optimize && !register_operand (op1, mode1))
34427 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34428 op1 = copy_to_mode_reg (mode1, op1);
34430 pat = GEN_FCN (d->icode) (op0, op1);
34434 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34435 gen_rtx_fmt_ee (comparison, QImode,
34439 return SUBREG_REG (target);
34442 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
34445 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
34446 tree exp, rtx target)
34449 tree arg0 = CALL_EXPR_ARG (exp, 0);
34450 tree arg1 = CALL_EXPR_ARG (exp, 1);
34451 tree arg2 = CALL_EXPR_ARG (exp, 2);
34452 tree arg3 = CALL_EXPR_ARG (exp, 3);
34453 tree arg4 = CALL_EXPR_ARG (exp, 4);
34454 rtx scratch0, scratch1;
34455 rtx op0 = expand_normal (arg0);
34456 rtx op1 = expand_normal (arg1);
34457 rtx op2 = expand_normal (arg2);
34458 rtx op3 = expand_normal (arg3);
34459 rtx op4 = expand_normal (arg4);
34460 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
34462 tmode0 = insn_data[d->icode].operand[0].mode;
34463 tmode1 = insn_data[d->icode].operand[1].mode;
34464 modev2 = insn_data[d->icode].operand[2].mode;
34465 modei3 = insn_data[d->icode].operand[3].mode;
34466 modev4 = insn_data[d->icode].operand[4].mode;
34467 modei5 = insn_data[d->icode].operand[5].mode;
34468 modeimm = insn_data[d->icode].operand[6].mode;
34470 if (VECTOR_MODE_P (modev2))
34471 op0 = safe_vector_operand (op0, modev2);
34472 if (VECTOR_MODE_P (modev4))
34473 op2 = safe_vector_operand (op2, modev4);
34475 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34476 op0 = copy_to_mode_reg (modev2, op0);
34477 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
34478 op1 = copy_to_mode_reg (modei3, op1);
34479 if ((optimize && !register_operand (op2, modev4))
34480 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
34481 op2 = copy_to_mode_reg (modev4, op2);
34482 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
34483 op3 = copy_to_mode_reg (modei5, op3);
34485 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
34487 error ("the fifth argument must be an 8-bit immediate");
34491 if (d->code == IX86_BUILTIN_PCMPESTRI128)
34493 if (optimize || !target
34494 || GET_MODE (target) != tmode0
34495 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34496 target = gen_reg_rtx (tmode0);
34498 scratch1 = gen_reg_rtx (tmode1);
34500 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
34502 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
34504 if (optimize || !target
34505 || GET_MODE (target) != tmode1
34506 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34507 target = gen_reg_rtx (tmode1);
34509 scratch0 = gen_reg_rtx (tmode0);
34511 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
34515 gcc_assert (d->flag);
34517 scratch0 = gen_reg_rtx (tmode0);
34518 scratch1 = gen_reg_rtx (tmode1);
34520 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
34530 target = gen_reg_rtx (SImode);
34531 emit_move_insn (target, const0_rtx);
34532 target = gen_rtx_SUBREG (QImode, target, 0);
34535 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34536 gen_rtx_fmt_ee (EQ, QImode,
34537 gen_rtx_REG ((machine_mode) d->flag,
34540 return SUBREG_REG (target);
34547 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
34550 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
34551 tree exp, rtx target)
34554 tree arg0 = CALL_EXPR_ARG (exp, 0);
34555 tree arg1 = CALL_EXPR_ARG (exp, 1);
34556 tree arg2 = CALL_EXPR_ARG (exp, 2);
34557 rtx scratch0, scratch1;
34558 rtx op0 = expand_normal (arg0);
34559 rtx op1 = expand_normal (arg1);
34560 rtx op2 = expand_normal (arg2);
34561 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
34563 tmode0 = insn_data[d->icode].operand[0].mode;
34564 tmode1 = insn_data[d->icode].operand[1].mode;
34565 modev2 = insn_data[d->icode].operand[2].mode;
34566 modev3 = insn_data[d->icode].operand[3].mode;
34567 modeimm = insn_data[d->icode].operand[4].mode;
34569 if (VECTOR_MODE_P (modev2))
34570 op0 = safe_vector_operand (op0, modev2);
34571 if (VECTOR_MODE_P (modev3))
34572 op1 = safe_vector_operand (op1, modev3);
34574 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34575 op0 = copy_to_mode_reg (modev2, op0);
34576 if ((optimize && !register_operand (op1, modev3))
34577 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
34578 op1 = copy_to_mode_reg (modev3, op1);
34580 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
34582 error ("the third argument must be an 8-bit immediate");
34586 if (d->code == IX86_BUILTIN_PCMPISTRI128)
34588 if (optimize || !target
34589 || GET_MODE (target) != tmode0
34590 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34591 target = gen_reg_rtx (tmode0);
34593 scratch1 = gen_reg_rtx (tmode1);
34595 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
34597 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
34599 if (optimize || !target
34600 || GET_MODE (target) != tmode1
34601 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34602 target = gen_reg_rtx (tmode1);
34604 scratch0 = gen_reg_rtx (tmode0);
34606 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
34610 gcc_assert (d->flag);
34612 scratch0 = gen_reg_rtx (tmode0);
34613 scratch1 = gen_reg_rtx (tmode1);
34615 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
34625 target = gen_reg_rtx (SImode);
34626 emit_move_insn (target, const0_rtx);
34627 target = gen_rtx_SUBREG (QImode, target, 0);
34630 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34631 gen_rtx_fmt_ee (EQ, QImode,
34632 gen_rtx_REG ((machine_mode) d->flag,
34635 return SUBREG_REG (target);
34641 /* Subroutine of ix86_expand_builtin to take care of insns with
34642 variable number of operands. */
34645 ix86_expand_args_builtin (const struct builtin_description *d,
34646 tree exp, rtx target)
34648 rtx pat, real_target;
34649 unsigned int i, nargs;
34650 unsigned int nargs_constant = 0;
34651 unsigned int mask_pos = 0;
34652 int num_memory = 0;
34658 bool second_arg_count = false;
34659 enum insn_code icode = d->icode;
34660 const struct insn_data_d *insn_p = &insn_data[icode];
34661 machine_mode tmode = insn_p->operand[0].mode;
34662 machine_mode rmode = VOIDmode;
34664 enum rtx_code comparison = d->comparison;
34666 switch ((enum ix86_builtin_func_type) d->flag)
34668 case V2DF_FTYPE_V2DF_ROUND:
34669 case V4DF_FTYPE_V4DF_ROUND:
34670 case V8DF_FTYPE_V8DF_ROUND:
34671 case V4SF_FTYPE_V4SF_ROUND:
34672 case V8SF_FTYPE_V8SF_ROUND:
34673 case V16SF_FTYPE_V16SF_ROUND:
34674 case V4SI_FTYPE_V4SF_ROUND:
34675 case V8SI_FTYPE_V8SF_ROUND:
34676 case V16SI_FTYPE_V16SF_ROUND:
34677 return ix86_expand_sse_round (d, exp, target);
34678 case V4SI_FTYPE_V2DF_V2DF_ROUND:
34679 case V8SI_FTYPE_V4DF_V4DF_ROUND:
34680 case V16SI_FTYPE_V8DF_V8DF_ROUND:
34681 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
34682 case INT_FTYPE_V8SF_V8SF_PTEST:
34683 case INT_FTYPE_V4DI_V4DI_PTEST:
34684 case INT_FTYPE_V4DF_V4DF_PTEST:
34685 case INT_FTYPE_V4SF_V4SF_PTEST:
34686 case INT_FTYPE_V2DI_V2DI_PTEST:
34687 case INT_FTYPE_V2DF_V2DF_PTEST:
34688 return ix86_expand_sse_ptest (d, exp, target);
34689 case FLOAT128_FTYPE_FLOAT128:
34690 case FLOAT_FTYPE_FLOAT:
34691 case INT_FTYPE_INT:
34692 case UINT_FTYPE_UINT:
34693 case UINT16_FTYPE_UINT16:
34694 case UINT64_FTYPE_INT:
34695 case UINT64_FTYPE_UINT64:
34696 case INT64_FTYPE_INT64:
34697 case INT64_FTYPE_V4SF:
34698 case INT64_FTYPE_V2DF:
34699 case INT_FTYPE_V16QI:
34700 case INT_FTYPE_V8QI:
34701 case INT_FTYPE_V8SF:
34702 case INT_FTYPE_V4DF:
34703 case INT_FTYPE_V4SF:
34704 case INT_FTYPE_V2DF:
34705 case INT_FTYPE_V32QI:
34706 case V16QI_FTYPE_V16QI:
34707 case V8SI_FTYPE_V8SF:
34708 case V8SI_FTYPE_V4SI:
34709 case V8HI_FTYPE_V8HI:
34710 case V8HI_FTYPE_V16QI:
34711 case V8QI_FTYPE_V8QI:
34712 case V8SF_FTYPE_V8SF:
34713 case V8SF_FTYPE_V8SI:
34714 case V8SF_FTYPE_V4SF:
34715 case V8SF_FTYPE_V8HI:
34716 case V4SI_FTYPE_V4SI:
34717 case V4SI_FTYPE_V16QI:
34718 case V4SI_FTYPE_V4SF:
34719 case V4SI_FTYPE_V8SI:
34720 case V4SI_FTYPE_V8HI:
34721 case V4SI_FTYPE_V4DF:
34722 case V4SI_FTYPE_V2DF:
34723 case V4HI_FTYPE_V4HI:
34724 case V4DF_FTYPE_V4DF:
34725 case V4DF_FTYPE_V4SI:
34726 case V4DF_FTYPE_V4SF:
34727 case V4DF_FTYPE_V2DF:
34728 case V4SF_FTYPE_V4SF:
34729 case V4SF_FTYPE_V4SI:
34730 case V4SF_FTYPE_V8SF:
34731 case V4SF_FTYPE_V4DF:
34732 case V4SF_FTYPE_V8HI:
34733 case V4SF_FTYPE_V2DF:
34734 case V2DI_FTYPE_V2DI:
34735 case V2DI_FTYPE_V16QI:
34736 case V2DI_FTYPE_V8HI:
34737 case V2DI_FTYPE_V4SI:
34738 case V2DF_FTYPE_V2DF:
34739 case V2DF_FTYPE_V4SI:
34740 case V2DF_FTYPE_V4DF:
34741 case V2DF_FTYPE_V4SF:
34742 case V2DF_FTYPE_V2SI:
34743 case V2SI_FTYPE_V2SI:
34744 case V2SI_FTYPE_V4SF:
34745 case V2SI_FTYPE_V2SF:
34746 case V2SI_FTYPE_V2DF:
34747 case V2SF_FTYPE_V2SF:
34748 case V2SF_FTYPE_V2SI:
34749 case V32QI_FTYPE_V32QI:
34750 case V32QI_FTYPE_V16QI:
34751 case V16HI_FTYPE_V16HI:
34752 case V16HI_FTYPE_V8HI:
34753 case V8SI_FTYPE_V8SI:
34754 case V16HI_FTYPE_V16QI:
34755 case V8SI_FTYPE_V16QI:
34756 case V4DI_FTYPE_V16QI:
34757 case V8SI_FTYPE_V8HI:
34758 case V4DI_FTYPE_V8HI:
34759 case V4DI_FTYPE_V4SI:
34760 case V4DI_FTYPE_V2DI:
34761 case UQI_FTYPE_UQI:
34762 case UHI_FTYPE_UHI:
34763 case USI_FTYPE_USI:
34764 case USI_FTYPE_UQI:
34765 case USI_FTYPE_UHI:
34766 case UDI_FTYPE_UDI:
34767 case UHI_FTYPE_V16QI:
34768 case USI_FTYPE_V32QI:
34769 case UDI_FTYPE_V64QI:
34770 case V16QI_FTYPE_UHI:
34771 case V32QI_FTYPE_USI:
34772 case V64QI_FTYPE_UDI:
34773 case V8HI_FTYPE_UQI:
34774 case V16HI_FTYPE_UHI:
34775 case V32HI_FTYPE_USI:
34776 case V4SI_FTYPE_UQI:
34777 case V8SI_FTYPE_UQI:
34778 case V4SI_FTYPE_UHI:
34779 case V8SI_FTYPE_UHI:
34780 case UQI_FTYPE_V8HI:
34781 case UHI_FTYPE_V16HI:
34782 case USI_FTYPE_V32HI:
34783 case UQI_FTYPE_V4SI:
34784 case UQI_FTYPE_V8SI:
34785 case UHI_FTYPE_V16SI:
34786 case UQI_FTYPE_V2DI:
34787 case UQI_FTYPE_V4DI:
34788 case UQI_FTYPE_V8DI:
34789 case V16SI_FTYPE_UHI:
34790 case V2DI_FTYPE_UQI:
34791 case V4DI_FTYPE_UQI:
34792 case V16SI_FTYPE_INT:
34793 case V16SF_FTYPE_V8SF:
34794 case V16SI_FTYPE_V8SI:
34795 case V16SF_FTYPE_V4SF:
34796 case V16SI_FTYPE_V4SI:
34797 case V16SI_FTYPE_V16SF:
34798 case V16SI_FTYPE_V16SI:
34799 case V64QI_FTYPE_V64QI:
34800 case V32HI_FTYPE_V32HI:
34801 case V16SF_FTYPE_V16SF:
34802 case V8DI_FTYPE_UQI:
34803 case V8DI_FTYPE_V8DI:
34804 case V8DF_FTYPE_V4DF:
34805 case V8DF_FTYPE_V2DF:
34806 case V8DF_FTYPE_V8DF:
34807 case V4DI_FTYPE_V4DI:
34810 case V4SF_FTYPE_V4SF_VEC_MERGE:
34811 case V2DF_FTYPE_V2DF_VEC_MERGE:
34812 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
34813 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
34814 case V16QI_FTYPE_V16QI_V16QI:
34815 case V16QI_FTYPE_V8HI_V8HI:
34816 case V16SF_FTYPE_V16SF_V16SF:
34817 case V8QI_FTYPE_V8QI_V8QI:
34818 case V8QI_FTYPE_V4HI_V4HI:
34819 case V8HI_FTYPE_V8HI_V8HI:
34820 case V8HI_FTYPE_V16QI_V16QI:
34821 case V8HI_FTYPE_V4SI_V4SI:
34822 case V8SF_FTYPE_V8SF_V8SF:
34823 case V8SF_FTYPE_V8SF_V8SI:
34824 case V8DF_FTYPE_V8DF_V8DF:
34825 case V4SI_FTYPE_V4SI_V4SI:
34826 case V4SI_FTYPE_V8HI_V8HI:
34827 case V4SI_FTYPE_V2DF_V2DF:
34828 case V4HI_FTYPE_V4HI_V4HI:
34829 case V4HI_FTYPE_V8QI_V8QI:
34830 case V4HI_FTYPE_V2SI_V2SI:
34831 case V4DF_FTYPE_V4DF_V4DF:
34832 case V4DF_FTYPE_V4DF_V4DI:
34833 case V4SF_FTYPE_V4SF_V4SF:
34834 case V4SF_FTYPE_V4SF_V4SI:
34835 case V4SF_FTYPE_V4SF_V2SI:
34836 case V4SF_FTYPE_V4SF_V2DF:
34837 case V4SF_FTYPE_V4SF_UINT:
34838 case V4SF_FTYPE_V4SF_DI:
34839 case V4SF_FTYPE_V4SF_SI:
34840 case V2DI_FTYPE_V2DI_V2DI:
34841 case V2DI_FTYPE_V16QI_V16QI:
34842 case V2DI_FTYPE_V4SI_V4SI:
34843 case V2DI_FTYPE_V2DI_V16QI:
34844 case V2SI_FTYPE_V2SI_V2SI:
34845 case V2SI_FTYPE_V4HI_V4HI:
34846 case V2SI_FTYPE_V2SF_V2SF:
34847 case V2DF_FTYPE_V2DF_V2DF:
34848 case V2DF_FTYPE_V2DF_V4SF:
34849 case V2DF_FTYPE_V2DF_V2DI:
34850 case V2DF_FTYPE_V2DF_DI:
34851 case V2DF_FTYPE_V2DF_SI:
34852 case V2DF_FTYPE_V2DF_UINT:
34853 case V2SF_FTYPE_V2SF_V2SF:
34854 case V1DI_FTYPE_V1DI_V1DI:
34855 case V1DI_FTYPE_V8QI_V8QI:
34856 case V1DI_FTYPE_V2SI_V2SI:
34857 case V32QI_FTYPE_V16HI_V16HI:
34858 case V16HI_FTYPE_V8SI_V8SI:
34859 case V64QI_FTYPE_V64QI_V64QI:
34860 case V32QI_FTYPE_V32QI_V32QI:
34861 case V16HI_FTYPE_V32QI_V32QI:
34862 case V16HI_FTYPE_V16HI_V16HI:
34863 case V8SI_FTYPE_V4DF_V4DF:
34864 case V8SI_FTYPE_V8SI_V8SI:
34865 case V8SI_FTYPE_V16HI_V16HI:
34866 case V4DI_FTYPE_V4DI_V4DI:
34867 case V4DI_FTYPE_V8SI_V8SI:
34868 case V8DI_FTYPE_V64QI_V64QI:
34869 if (comparison == UNKNOWN)
34870 return ix86_expand_binop_builtin (icode, exp, target);
34873 case V4SF_FTYPE_V4SF_V4SF_SWAP:
34874 case V2DF_FTYPE_V2DF_V2DF_SWAP:
34875 gcc_assert (comparison != UNKNOWN);
34879 case V16HI_FTYPE_V16HI_V8HI_COUNT:
34880 case V16HI_FTYPE_V16HI_SI_COUNT:
34881 case V8SI_FTYPE_V8SI_V4SI_COUNT:
34882 case V8SI_FTYPE_V8SI_SI_COUNT:
34883 case V4DI_FTYPE_V4DI_V2DI_COUNT:
34884 case V4DI_FTYPE_V4DI_INT_COUNT:
34885 case V8HI_FTYPE_V8HI_V8HI_COUNT:
34886 case V8HI_FTYPE_V8HI_SI_COUNT:
34887 case V4SI_FTYPE_V4SI_V4SI_COUNT:
34888 case V4SI_FTYPE_V4SI_SI_COUNT:
34889 case V4HI_FTYPE_V4HI_V4HI_COUNT:
34890 case V4HI_FTYPE_V4HI_SI_COUNT:
34891 case V2DI_FTYPE_V2DI_V2DI_COUNT:
34892 case V2DI_FTYPE_V2DI_SI_COUNT:
34893 case V2SI_FTYPE_V2SI_V2SI_COUNT:
34894 case V2SI_FTYPE_V2SI_SI_COUNT:
34895 case V1DI_FTYPE_V1DI_V1DI_COUNT:
34896 case V1DI_FTYPE_V1DI_SI_COUNT:
34898 second_arg_count = true;
34900 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
34901 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
34902 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
34903 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
34904 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
34905 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
34906 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
34907 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
34908 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
34909 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
34910 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
34911 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
34912 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
34913 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
34914 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
34915 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
34916 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
34917 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
34919 second_arg_count = true;
34921 case UINT64_FTYPE_UINT64_UINT64:
34922 case UINT_FTYPE_UINT_UINT:
34923 case UINT_FTYPE_UINT_USHORT:
34924 case UINT_FTYPE_UINT_UCHAR:
34925 case UINT16_FTYPE_UINT16_INT:
34926 case UINT8_FTYPE_UINT8_INT:
34927 case UQI_FTYPE_UQI_UQI:
34928 case UHI_FTYPE_UHI_UHI:
34929 case USI_FTYPE_USI_USI:
34930 case UDI_FTYPE_UDI_UDI:
34931 case V16SI_FTYPE_V8DF_V8DF:
34934 case V2DI_FTYPE_V2DI_INT_CONVERT:
34937 nargs_constant = 1;
34939 case V4DI_FTYPE_V4DI_INT_CONVERT:
34942 nargs_constant = 1;
34944 case V8DI_FTYPE_V8DI_INT_CONVERT:
34947 nargs_constant = 1;
34949 case V8HI_FTYPE_V8HI_INT:
34950 case V8HI_FTYPE_V8SF_INT:
34951 case V16HI_FTYPE_V16SF_INT:
34952 case V8HI_FTYPE_V4SF_INT:
34953 case V8SF_FTYPE_V8SF_INT:
34954 case V4SF_FTYPE_V16SF_INT:
34955 case V16SF_FTYPE_V16SF_INT:
34956 case V4SI_FTYPE_V4SI_INT:
34957 case V4SI_FTYPE_V8SI_INT:
34958 case V4HI_FTYPE_V4HI_INT:
34959 case V4DF_FTYPE_V4DF_INT:
34960 case V4DF_FTYPE_V8DF_INT:
34961 case V4SF_FTYPE_V4SF_INT:
34962 case V4SF_FTYPE_V8SF_INT:
34963 case V2DI_FTYPE_V2DI_INT:
34964 case V2DF_FTYPE_V2DF_INT:
34965 case V2DF_FTYPE_V4DF_INT:
34966 case V16HI_FTYPE_V16HI_INT:
34967 case V8SI_FTYPE_V8SI_INT:
34968 case V16SI_FTYPE_V16SI_INT:
34969 case V4SI_FTYPE_V16SI_INT:
34970 case V4DI_FTYPE_V4DI_INT:
34971 case V2DI_FTYPE_V4DI_INT:
34972 case V4DI_FTYPE_V8DI_INT:
34973 case QI_FTYPE_V4SF_INT:
34974 case QI_FTYPE_V2DF_INT:
34975 case UQI_FTYPE_UQI_UQI_CONST:
34976 case UHI_FTYPE_UHI_UQI:
34977 case USI_FTYPE_USI_UQI:
34978 case UDI_FTYPE_UDI_UQI:
34980 nargs_constant = 1;
34982 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34983 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34984 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34985 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34986 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34987 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34988 case UHI_FTYPE_V16SI_V16SI_UHI:
34989 case UQI_FTYPE_V8DI_V8DI_UQI:
34990 case V16HI_FTYPE_V16SI_V16HI_UHI:
34991 case V16QI_FTYPE_V16SI_V16QI_UHI:
34992 case V16QI_FTYPE_V8DI_V16QI_UQI:
34993 case V16SF_FTYPE_V16SF_V16SF_UHI:
34994 case V16SF_FTYPE_V4SF_V16SF_UHI:
34995 case V16SI_FTYPE_SI_V16SI_UHI:
34996 case V16SI_FTYPE_V16HI_V16SI_UHI:
34997 case V16SI_FTYPE_V16QI_V16SI_UHI:
34998 case V8SF_FTYPE_V4SF_V8SF_UQI:
34999 case V4DF_FTYPE_V2DF_V4DF_UQI:
35000 case V8SI_FTYPE_V4SI_V8SI_UQI:
35001 case V8SI_FTYPE_SI_V8SI_UQI:
35002 case V4SI_FTYPE_V4SI_V4SI_UQI:
35003 case V4SI_FTYPE_SI_V4SI_UQI:
35004 case V4DI_FTYPE_V2DI_V4DI_UQI:
35005 case V4DI_FTYPE_DI_V4DI_UQI:
35006 case V2DI_FTYPE_V2DI_V2DI_UQI:
35007 case V2DI_FTYPE_DI_V2DI_UQI:
35008 case V64QI_FTYPE_V64QI_V64QI_UDI:
35009 case V64QI_FTYPE_V16QI_V64QI_UDI:
35010 case V64QI_FTYPE_QI_V64QI_UDI:
35011 case V32QI_FTYPE_V32QI_V32QI_USI:
35012 case V32QI_FTYPE_V16QI_V32QI_USI:
35013 case V32QI_FTYPE_QI_V32QI_USI:
35014 case V16QI_FTYPE_V16QI_V16QI_UHI:
35015 case V16QI_FTYPE_QI_V16QI_UHI:
35016 case V32HI_FTYPE_V8HI_V32HI_USI:
35017 case V32HI_FTYPE_HI_V32HI_USI:
35018 case V16HI_FTYPE_V8HI_V16HI_UHI:
35019 case V16HI_FTYPE_HI_V16HI_UHI:
35020 case V8HI_FTYPE_V8HI_V8HI_UQI:
35021 case V8HI_FTYPE_HI_V8HI_UQI:
35022 case V8SF_FTYPE_V8HI_V8SF_UQI:
35023 case V4SF_FTYPE_V8HI_V4SF_UQI:
35024 case V8SI_FTYPE_V8SF_V8SI_UQI:
35025 case V4SI_FTYPE_V4SF_V4SI_UQI:
35026 case V4DI_FTYPE_V4SF_V4DI_UQI:
35027 case V2DI_FTYPE_V4SF_V2DI_UQI:
35028 case V4SF_FTYPE_V4DI_V4SF_UQI:
35029 case V4SF_FTYPE_V2DI_V4SF_UQI:
35030 case V4DF_FTYPE_V4DI_V4DF_UQI:
35031 case V2DF_FTYPE_V2DI_V2DF_UQI:
35032 case V16QI_FTYPE_V8HI_V16QI_UQI:
35033 case V16QI_FTYPE_V16HI_V16QI_UHI:
35034 case V16QI_FTYPE_V4SI_V16QI_UQI:
35035 case V16QI_FTYPE_V8SI_V16QI_UQI:
35036 case V8HI_FTYPE_V4SI_V8HI_UQI:
35037 case V8HI_FTYPE_V8SI_V8HI_UQI:
35038 case V16QI_FTYPE_V2DI_V16QI_UQI:
35039 case V16QI_FTYPE_V4DI_V16QI_UQI:
35040 case V8HI_FTYPE_V2DI_V8HI_UQI:
35041 case V8HI_FTYPE_V4DI_V8HI_UQI:
35042 case V4SI_FTYPE_V2DI_V4SI_UQI:
35043 case V4SI_FTYPE_V4DI_V4SI_UQI:
35044 case V32QI_FTYPE_V32HI_V32QI_USI:
35045 case UHI_FTYPE_V16QI_V16QI_UHI:
35046 case USI_FTYPE_V32QI_V32QI_USI:
35047 case UDI_FTYPE_V64QI_V64QI_UDI:
35048 case UQI_FTYPE_V8HI_V8HI_UQI:
35049 case UHI_FTYPE_V16HI_V16HI_UHI:
35050 case USI_FTYPE_V32HI_V32HI_USI:
35051 case UQI_FTYPE_V4SI_V4SI_UQI:
35052 case UQI_FTYPE_V8SI_V8SI_UQI:
35053 case UQI_FTYPE_V2DI_V2DI_UQI:
35054 case UQI_FTYPE_V4DI_V4DI_UQI:
35055 case V4SF_FTYPE_V2DF_V4SF_UQI:
35056 case V4SF_FTYPE_V4DF_V4SF_UQI:
35057 case V16SI_FTYPE_V16SI_V16SI_UHI:
35058 case V16SI_FTYPE_V4SI_V16SI_UHI:
35059 case V2DI_FTYPE_V4SI_V2DI_UQI:
35060 case V2DI_FTYPE_V8HI_V2DI_UQI:
35061 case V2DI_FTYPE_V16QI_V2DI_UQI:
35062 case V4DI_FTYPE_V4DI_V4DI_UQI:
35063 case V4DI_FTYPE_V4SI_V4DI_UQI:
35064 case V4DI_FTYPE_V8HI_V4DI_UQI:
35065 case V4DI_FTYPE_V16QI_V4DI_UQI:
35066 case V4DI_FTYPE_V4DF_V4DI_UQI:
35067 case V2DI_FTYPE_V2DF_V2DI_UQI:
35068 case V4SI_FTYPE_V4DF_V4SI_UQI:
35069 case V4SI_FTYPE_V2DF_V4SI_UQI:
35070 case V4SI_FTYPE_V8HI_V4SI_UQI:
35071 case V4SI_FTYPE_V16QI_V4SI_UQI:
35072 case V4DI_FTYPE_V4DI_V4DI_V4DI:
35073 case V8DF_FTYPE_V2DF_V8DF_UQI:
35074 case V8DF_FTYPE_V4DF_V8DF_UQI:
35075 case V8DF_FTYPE_V8DF_V8DF_UQI:
35076 case V8SF_FTYPE_V8SF_V8SF_UQI:
35077 case V8SF_FTYPE_V8SI_V8SF_UQI:
35078 case V4DF_FTYPE_V4DF_V4DF_UQI:
35079 case V4SF_FTYPE_V4SF_V4SF_UQI:
35080 case V2DF_FTYPE_V2DF_V2DF_UQI:
35081 case V2DF_FTYPE_V4SF_V2DF_UQI:
35082 case V2DF_FTYPE_V4SI_V2DF_UQI:
35083 case V4SF_FTYPE_V4SI_V4SF_UQI:
35084 case V4DF_FTYPE_V4SF_V4DF_UQI:
35085 case V4DF_FTYPE_V4SI_V4DF_UQI:
35086 case V8SI_FTYPE_V8SI_V8SI_UQI:
35087 case V8SI_FTYPE_V8HI_V8SI_UQI:
35088 case V8SI_FTYPE_V16QI_V8SI_UQI:
35089 case V8DF_FTYPE_V8SI_V8DF_UQI:
35090 case V8DI_FTYPE_DI_V8DI_UQI:
35091 case V16SF_FTYPE_V8SF_V16SF_UHI:
35092 case V16SI_FTYPE_V8SI_V16SI_UHI:
35093 case V16HI_FTYPE_V16HI_V16HI_UHI:
35094 case V8HI_FTYPE_V16QI_V8HI_UQI:
35095 case V16HI_FTYPE_V16QI_V16HI_UHI:
35096 case V32HI_FTYPE_V32HI_V32HI_USI:
35097 case V32HI_FTYPE_V32QI_V32HI_USI:
35098 case V8DI_FTYPE_V16QI_V8DI_UQI:
35099 case V8DI_FTYPE_V2DI_V8DI_UQI:
35100 case V8DI_FTYPE_V4DI_V8DI_UQI:
35101 case V8DI_FTYPE_V8DI_V8DI_UQI:
35102 case V8DI_FTYPE_V8HI_V8DI_UQI:
35103 case V8DI_FTYPE_V8SI_V8DI_UQI:
35104 case V8HI_FTYPE_V8DI_V8HI_UQI:
35105 case V8SI_FTYPE_V8DI_V8SI_UQI:
35106 case V4SI_FTYPE_V4SI_V4SI_V4SI:
35107 case V16SI_FTYPE_V16SI_V16SI_V16SI:
35108 case V8DI_FTYPE_V8DI_V8DI_V8DI:
35109 case V32HI_FTYPE_V32HI_V32HI_V32HI:
35110 case V2DI_FTYPE_V2DI_V2DI_V2DI:
35111 case V16HI_FTYPE_V16HI_V16HI_V16HI:
35112 case V8SI_FTYPE_V8SI_V8SI_V8SI:
35113 case V8HI_FTYPE_V8HI_V8HI_V8HI:
35116 case V32QI_FTYPE_V32QI_V32QI_INT:
35117 case V16HI_FTYPE_V16HI_V16HI_INT:
35118 case V16QI_FTYPE_V16QI_V16QI_INT:
35119 case V4DI_FTYPE_V4DI_V4DI_INT:
35120 case V8HI_FTYPE_V8HI_V8HI_INT:
35121 case V8SI_FTYPE_V8SI_V8SI_INT:
35122 case V8SI_FTYPE_V8SI_V4SI_INT:
35123 case V8SF_FTYPE_V8SF_V8SF_INT:
35124 case V8SF_FTYPE_V8SF_V4SF_INT:
35125 case V4SI_FTYPE_V4SI_V4SI_INT:
35126 case V4DF_FTYPE_V4DF_V4DF_INT:
35127 case V16SF_FTYPE_V16SF_V16SF_INT:
35128 case V16SF_FTYPE_V16SF_V4SF_INT:
35129 case V16SI_FTYPE_V16SI_V4SI_INT:
35130 case V4DF_FTYPE_V4DF_V2DF_INT:
35131 case V4SF_FTYPE_V4SF_V4SF_INT:
35132 case V2DI_FTYPE_V2DI_V2DI_INT:
35133 case V4DI_FTYPE_V4DI_V2DI_INT:
35134 case V2DF_FTYPE_V2DF_V2DF_INT:
35135 case UQI_FTYPE_V8DI_V8UDI_INT:
35136 case UQI_FTYPE_V8DF_V8DF_INT:
35137 case UQI_FTYPE_V2DF_V2DF_INT:
35138 case UQI_FTYPE_V4SF_V4SF_INT:
35139 case UHI_FTYPE_V16SI_V16SI_INT:
35140 case UHI_FTYPE_V16SF_V16SF_INT:
35141 case V64QI_FTYPE_V64QI_V64QI_INT:
35142 case V32HI_FTYPE_V32HI_V32HI_INT:
35143 case V16SI_FTYPE_V16SI_V16SI_INT:
35144 case V8DI_FTYPE_V8DI_V8DI_INT:
35145 case V4DF_FTYPE_V4DF_V4DI_INT:
35146 case V8SF_FTYPE_V8SF_V8SI_INT:
35147 case V2DF_FTYPE_V2DF_V2DI_INT:
35148 case V4SF_FTYPE_V4SF_V4SI_INT:
35150 nargs_constant = 1;
35152 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
35155 nargs_constant = 1;
35157 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
35160 nargs_constant = 1;
35162 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
35165 nargs_constant = 1;
35167 case V2DI_FTYPE_V2DI_UINT_UINT:
35169 nargs_constant = 2;
35171 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
35174 nargs_constant = 1;
35176 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
35180 nargs_constant = 1;
35182 case QI_FTYPE_V8DF_INT_UQI:
35183 case QI_FTYPE_V4DF_INT_UQI:
35184 case QI_FTYPE_V2DF_INT_UQI:
35185 case HI_FTYPE_V16SF_INT_UHI:
35186 case QI_FTYPE_V8SF_INT_UQI:
35187 case QI_FTYPE_V4SF_INT_UQI:
35188 case V4SI_FTYPE_V4SI_V4SI_UHI:
35189 case V8SI_FTYPE_V8SI_V8SI_UHI:
35192 nargs_constant = 1;
35194 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
35198 nargs_constant = 1;
35200 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
35204 nargs_constant = 1;
35206 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
35207 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
35208 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
35209 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
35210 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
35211 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
35212 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
35213 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
35214 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
35215 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
35216 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
35217 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
35218 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
35219 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
35220 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
35221 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
35222 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
35223 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
35224 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
35225 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
35226 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
35227 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
35228 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
35229 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
35230 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
35231 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
35232 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
35233 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
35234 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
35235 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
35236 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
35237 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
35238 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
35239 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
35240 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
35241 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
35242 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
35243 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
35244 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
35245 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
35246 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
35247 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
35248 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
35249 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
35250 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
35251 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
35252 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
35253 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
35254 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
35255 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
35256 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
35259 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
35260 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
35261 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
35262 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
35263 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
35265 nargs_constant = 1;
35267 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
35268 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
35269 case QI_FTYPE_V4DF_V4DF_INT_UQI:
35270 case QI_FTYPE_V8SF_V8SF_INT_UQI:
35271 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
35272 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
35273 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
35274 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
35275 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
35276 case USI_FTYPE_V32QI_V32QI_INT_USI:
35277 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
35278 case USI_FTYPE_V32HI_V32HI_INT_USI:
35279 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
35280 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
35281 case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
35282 case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
35283 case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
35284 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
35285 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
35286 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
35287 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
35288 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
35289 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
35292 nargs_constant = 1;
35294 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
35296 nargs_constant = 2;
35298 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
35299 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
35302 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
35303 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
35304 case V4DF_FTYPE_V4DF_V4DI_INT_UQI:
35305 case V8SF_FTYPE_V8SF_V8SI_INT_UQI:
35306 case V2DF_FTYPE_V2DF_V2DI_INT_UQI:
35307 case V4SF_FTYPE_V4SF_V4SI_INT_UQI:
35310 nargs_constant = 1;
35312 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
35313 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
35314 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
35315 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
35316 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
35317 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
35318 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
35319 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
35320 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
35321 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
35322 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
35323 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
35324 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
35325 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
35326 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
35327 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
35328 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
35329 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
35330 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
35331 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
35332 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
35333 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
35334 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
35335 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
35336 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
35337 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
35338 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
35339 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
35340 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
35341 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
35344 nargs_constant = 1;
35346 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
35347 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
35348 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
35349 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
35350 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
35351 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
35352 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
35353 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
35354 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
35355 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
35356 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
35357 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
35358 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
35359 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
35360 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
35361 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
35362 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
35363 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
35364 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
35365 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
35366 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
35367 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
35368 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
35369 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
35370 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
35371 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
35372 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
35373 case V4DF_FTYPE_V4DF_V4DI_INT_V4DF_UQI:
35374 case V8SF_FTYPE_V8SF_V8SI_INT_V8SF_UQI:
35375 case V2DF_FTYPE_V2DF_V2DI_INT_V2DF_UQI:
35376 case V4SF_FTYPE_V4SF_V4SI_INT_V4SF_UQI:
35379 nargs_constant = 1;
35381 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
35382 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
35383 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
35384 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
35385 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
35386 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
35389 nargs_constant = 1;
35391 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
35392 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
35393 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
35394 case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT:
35395 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT:
35396 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT:
35397 case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT:
35398 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT:
35399 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT:
35400 case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
35401 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
35402 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
35405 nargs_constant = 2;
35409 gcc_unreachable ();
35412 gcc_assert (nargs <= ARRAY_SIZE (args));
35414 if (comparison != UNKNOWN)
35416 gcc_assert (nargs == 2);
35417 return ix86_expand_sse_compare (d, exp, target, swap);
35420 if (rmode == VOIDmode || rmode == tmode)
35424 || GET_MODE (target) != tmode
35425 || !insn_p->operand[0].predicate (target, tmode))
35426 target = gen_reg_rtx (tmode);
35427 else if (memory_operand (target, tmode))
35429 real_target = target;
35433 real_target = gen_reg_rtx (tmode);
35434 target = lowpart_subreg (rmode, real_target, tmode);
35437 for (i = 0; i < nargs; i++)
35439 tree arg = CALL_EXPR_ARG (exp, i);
35440 rtx op = expand_normal (arg);
35441 machine_mode mode = insn_p->operand[i + 1].mode;
35442 bool match = insn_p->operand[i + 1].predicate (op, mode);
35444 if (second_arg_count && i == 1)
35446 /* SIMD shift insns take either an 8-bit immediate or
35447 register as count. But builtin functions take int as
35448 count. If count doesn't match, we put it in register.
35449 The instructions are using 64-bit count, if op is just
35450 32-bit, zero-extend it, as negative shift counts
35451 are undefined behavior and zero-extension is more
35455 if (SCALAR_INT_MODE_P (GET_MODE (op)))
35456 op = convert_modes (mode, GET_MODE (op), op, 1);
35458 op = lowpart_subreg (mode, op, GET_MODE (op));
35459 if (!insn_p->operand[i + 1].predicate (op, mode))
35460 op = copy_to_reg (op);
35463 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35464 (!mask_pos && (nargs - i) <= nargs_constant))
35469 case CODE_FOR_avx_vinsertf128v4di:
35470 case CODE_FOR_avx_vextractf128v4di:
35471 error ("the last argument must be an 1-bit immediate");
35474 case CODE_FOR_avx512f_cmpv8di3_mask:
35475 case CODE_FOR_avx512f_cmpv16si3_mask:
35476 case CODE_FOR_avx512f_ucmpv8di3_mask:
35477 case CODE_FOR_avx512f_ucmpv16si3_mask:
35478 case CODE_FOR_avx512vl_cmpv4di3_mask:
35479 case CODE_FOR_avx512vl_cmpv8si3_mask:
35480 case CODE_FOR_avx512vl_ucmpv4di3_mask:
35481 case CODE_FOR_avx512vl_ucmpv8si3_mask:
35482 case CODE_FOR_avx512vl_cmpv2di3_mask:
35483 case CODE_FOR_avx512vl_cmpv4si3_mask:
35484 case CODE_FOR_avx512vl_ucmpv2di3_mask:
35485 case CODE_FOR_avx512vl_ucmpv4si3_mask:
35486 error ("the last argument must be a 3-bit immediate");
35489 case CODE_FOR_sse4_1_roundsd:
35490 case CODE_FOR_sse4_1_roundss:
35492 case CODE_FOR_sse4_1_roundpd:
35493 case CODE_FOR_sse4_1_roundps:
35494 case CODE_FOR_avx_roundpd256:
35495 case CODE_FOR_avx_roundps256:
35497 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
35498 case CODE_FOR_sse4_1_roundps_sfix:
35499 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
35500 case CODE_FOR_avx_roundps_sfix256:
35502 case CODE_FOR_sse4_1_blendps:
35503 case CODE_FOR_avx_blendpd256:
35504 case CODE_FOR_avx_vpermilv4df:
35505 case CODE_FOR_avx_vpermilv4df_mask:
35506 case CODE_FOR_avx512f_getmantv8df_mask:
35507 case CODE_FOR_avx512f_getmantv16sf_mask:
35508 case CODE_FOR_avx512vl_getmantv8sf_mask:
35509 case CODE_FOR_avx512vl_getmantv4df_mask:
35510 case CODE_FOR_avx512vl_getmantv4sf_mask:
35511 case CODE_FOR_avx512vl_getmantv2df_mask:
35512 case CODE_FOR_avx512dq_rangepv8df_mask_round:
35513 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
35514 case CODE_FOR_avx512dq_rangepv4df_mask:
35515 case CODE_FOR_avx512dq_rangepv8sf_mask:
35516 case CODE_FOR_avx512dq_rangepv2df_mask:
35517 case CODE_FOR_avx512dq_rangepv4sf_mask:
35518 case CODE_FOR_avx_shufpd256_mask:
35519 error ("the last argument must be a 4-bit immediate");
35522 case CODE_FOR_sha1rnds4:
35523 case CODE_FOR_sse4_1_blendpd:
35524 case CODE_FOR_avx_vpermilv2df:
35525 case CODE_FOR_avx_vpermilv2df_mask:
35526 case CODE_FOR_xop_vpermil2v2df3:
35527 case CODE_FOR_xop_vpermil2v4sf3:
35528 case CODE_FOR_xop_vpermil2v4df3:
35529 case CODE_FOR_xop_vpermil2v8sf3:
35530 case CODE_FOR_avx512f_vinsertf32x4_mask:
35531 case CODE_FOR_avx512f_vinserti32x4_mask:
35532 case CODE_FOR_avx512f_vextractf32x4_mask:
35533 case CODE_FOR_avx512f_vextracti32x4_mask:
35534 case CODE_FOR_sse2_shufpd:
35535 case CODE_FOR_sse2_shufpd_mask:
35536 case CODE_FOR_avx512dq_shuf_f64x2_mask:
35537 case CODE_FOR_avx512dq_shuf_i64x2_mask:
35538 case CODE_FOR_avx512vl_shuf_i32x4_mask:
35539 case CODE_FOR_avx512vl_shuf_f32x4_mask:
35540 error ("the last argument must be a 2-bit immediate");
35543 case CODE_FOR_avx_vextractf128v4df:
35544 case CODE_FOR_avx_vextractf128v8sf:
35545 case CODE_FOR_avx_vextractf128v8si:
35546 case CODE_FOR_avx_vinsertf128v4df:
35547 case CODE_FOR_avx_vinsertf128v8sf:
35548 case CODE_FOR_avx_vinsertf128v8si:
35549 case CODE_FOR_avx512f_vinsertf64x4_mask:
35550 case CODE_FOR_avx512f_vinserti64x4_mask:
35551 case CODE_FOR_avx512f_vextractf64x4_mask:
35552 case CODE_FOR_avx512f_vextracti64x4_mask:
35553 case CODE_FOR_avx512dq_vinsertf32x8_mask:
35554 case CODE_FOR_avx512dq_vinserti32x8_mask:
35555 case CODE_FOR_avx512vl_vinsertv4df:
35556 case CODE_FOR_avx512vl_vinsertv4di:
35557 case CODE_FOR_avx512vl_vinsertv8sf:
35558 case CODE_FOR_avx512vl_vinsertv8si:
35559 error ("the last argument must be a 1-bit immediate");
35562 case CODE_FOR_avx_vmcmpv2df3:
35563 case CODE_FOR_avx_vmcmpv4sf3:
35564 case CODE_FOR_avx_cmpv2df3:
35565 case CODE_FOR_avx_cmpv4sf3:
35566 case CODE_FOR_avx_cmpv4df3:
35567 case CODE_FOR_avx_cmpv8sf3:
35568 case CODE_FOR_avx512f_cmpv8df3_mask:
35569 case CODE_FOR_avx512f_cmpv16sf3_mask:
35570 case CODE_FOR_avx512f_vmcmpv2df3_mask:
35571 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
35572 error ("the last argument must be a 5-bit immediate");
35576 switch (nargs_constant)
35579 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35580 (!mask_pos && (nargs - i) == nargs_constant))
35582 error ("the next to last argument must be an 8-bit immediate");
35587 error ("the last argument must be an 8-bit immediate");
35590 gcc_unreachable ();
35597 if (VECTOR_MODE_P (mode))
35598 op = safe_vector_operand (op, mode);
35600 /* If we aren't optimizing, only allow one memory operand to
35602 if (memory_operand (op, mode))
35605 op = fixup_modeless_constant (op, mode);
35607 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35609 if (optimize || !match || num_memory > 1)
35610 op = copy_to_mode_reg (mode, op);
35614 op = copy_to_reg (op);
35615 op = lowpart_subreg (mode, op, GET_MODE (op));
35620 args[i].mode = mode;
35626 pat = GEN_FCN (icode) (real_target, args[0].op);
35629 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
35632 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35636 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35637 args[2].op, args[3].op);
35640 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35641 args[2].op, args[3].op, args[4].op);
35644 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35645 args[2].op, args[3].op, args[4].op,
35649 gcc_unreachable ();
35659 /* Transform pattern of following layout:
35661 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
35667 ix86_erase_embedded_rounding (rtx pat)
35669 if (GET_CODE (pat) == INSN)
35670 pat = PATTERN (pat);
35672 gcc_assert (GET_CODE (pat) == SET);
35673 rtx src = SET_SRC (pat);
35674 gcc_assert (XVECLEN (src, 0) == 2);
35675 rtx p0 = XVECEXP (src, 0, 0);
35676 gcc_assert (GET_CODE (src) == UNSPEC
35677 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
35678 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
35682 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
35685 ix86_expand_sse_comi_round (const struct builtin_description *d,
35686 tree exp, rtx target)
35689 tree arg0 = CALL_EXPR_ARG (exp, 0);
35690 tree arg1 = CALL_EXPR_ARG (exp, 1);
35691 tree arg2 = CALL_EXPR_ARG (exp, 2);
35692 tree arg3 = CALL_EXPR_ARG (exp, 3);
35693 rtx op0 = expand_normal (arg0);
35694 rtx op1 = expand_normal (arg1);
35695 rtx op2 = expand_normal (arg2);
35696 rtx op3 = expand_normal (arg3);
35697 enum insn_code icode = d->icode;
35698 const struct insn_data_d *insn_p = &insn_data[icode];
35699 machine_mode mode0 = insn_p->operand[0].mode;
35700 machine_mode mode1 = insn_p->operand[1].mode;
35701 enum rtx_code comparison = UNEQ;
35702 bool need_ucomi = false;
35704 /* See avxintrin.h for values. */
35705 enum rtx_code comi_comparisons[32] =
35707 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
35708 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
35709 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
35711 bool need_ucomi_values[32] =
35713 true, false, false, true, true, false, false, true,
35714 true, false, false, true, true, false, false, true,
35715 false, true, true, false, false, true, true, false,
35716 false, true, true, false, false, true, true, false
35719 if (!CONST_INT_P (op2))
35721 error ("the third argument must be comparison constant");
35724 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
35726 error ("incorrect comparison mode");
35730 if (!insn_p->operand[2].predicate (op3, SImode))
35732 error ("incorrect rounding operand");
35736 comparison = comi_comparisons[INTVAL (op2)];
35737 need_ucomi = need_ucomi_values[INTVAL (op2)];
35739 if (VECTOR_MODE_P (mode0))
35740 op0 = safe_vector_operand (op0, mode0);
35741 if (VECTOR_MODE_P (mode1))
35742 op1 = safe_vector_operand (op1, mode1);
35744 target = gen_reg_rtx (SImode);
35745 emit_move_insn (target, const0_rtx);
35746 target = gen_rtx_SUBREG (QImode, target, 0);
35748 if ((optimize && !register_operand (op0, mode0))
35749 || !insn_p->operand[0].predicate (op0, mode0))
35750 op0 = copy_to_mode_reg (mode0, op0);
35751 if ((optimize && !register_operand (op1, mode1))
35752 || !insn_p->operand[1].predicate (op1, mode1))
35753 op1 = copy_to_mode_reg (mode1, op1);
35756 icode = icode == CODE_FOR_sse_comi_round
35757 ? CODE_FOR_sse_ucomi_round
35758 : CODE_FOR_sse2_ucomi_round;
35760 pat = GEN_FCN (icode) (op0, op1, op3);
35764 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
35765 if (INTVAL (op3) == NO_ROUND)
35767 pat = ix86_erase_embedded_rounding (pat);
35771 set_dst = SET_DEST (pat);
35775 gcc_assert (GET_CODE (pat) == SET);
35776 set_dst = SET_DEST (pat);
35780 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35781 gen_rtx_fmt_ee (comparison, QImode,
35785 return SUBREG_REG (target);
35789 ix86_expand_round_builtin (const struct builtin_description *d,
35790 tree exp, rtx target)
35793 unsigned int i, nargs;
35799 enum insn_code icode = d->icode;
35800 const struct insn_data_d *insn_p = &insn_data[icode];
35801 machine_mode tmode = insn_p->operand[0].mode;
35802 unsigned int nargs_constant = 0;
35803 unsigned int redundant_embed_rnd = 0;
35805 switch ((enum ix86_builtin_func_type) d->flag)
35807 case UINT64_FTYPE_V2DF_INT:
35808 case UINT64_FTYPE_V4SF_INT:
35809 case UINT_FTYPE_V2DF_INT:
35810 case UINT_FTYPE_V4SF_INT:
35811 case INT64_FTYPE_V2DF_INT:
35812 case INT64_FTYPE_V4SF_INT:
35813 case INT_FTYPE_V2DF_INT:
35814 case INT_FTYPE_V4SF_INT:
35817 case V4SF_FTYPE_V4SF_UINT_INT:
35818 case V4SF_FTYPE_V4SF_UINT64_INT:
35819 case V2DF_FTYPE_V2DF_UINT64_INT:
35820 case V4SF_FTYPE_V4SF_INT_INT:
35821 case V4SF_FTYPE_V4SF_INT64_INT:
35822 case V2DF_FTYPE_V2DF_INT64_INT:
35823 case V4SF_FTYPE_V4SF_V4SF_INT:
35824 case V2DF_FTYPE_V2DF_V2DF_INT:
35825 case V4SF_FTYPE_V4SF_V2DF_INT:
35826 case V2DF_FTYPE_V2DF_V4SF_INT:
35829 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
35830 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
35831 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
35832 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
35833 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
35834 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
35835 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
35836 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
35837 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
35838 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
35839 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
35840 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
35841 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
35842 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
35845 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
35846 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
35847 case V8DF_FTYPE_V8DF_V8DI_INT_INT:
35848 case V16SF_FTYPE_V16SF_V16SI_INT_INT:
35849 case V2DF_FTYPE_V2DF_V2DI_INT_INT:
35850 case V4SF_FTYPE_V4SF_V4SI_INT_INT:
35851 nargs_constant = 2;
35854 case INT_FTYPE_V4SF_V4SF_INT_INT:
35855 case INT_FTYPE_V2DF_V2DF_INT_INT:
35856 return ix86_expand_sse_comi_round (d, exp, target);
35857 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
35858 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
35859 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
35860 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
35861 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
35862 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
35863 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
35864 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
35867 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
35868 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
35869 nargs_constant = 4;
35872 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
35873 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
35874 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
35875 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
35876 case V8DF_FTYPE_V8DF_V8DI_INT_QI_INT:
35877 case V16SF_FTYPE_V16SF_V16SI_INT_HI_INT:
35878 case V2DF_FTYPE_V2DF_V2DI_INT_QI_INT:
35879 case V4SF_FTYPE_V4SF_V4SI_INT_QI_INT:
35880 nargs_constant = 3;
35883 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
35884 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
35885 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
35886 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
35887 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
35888 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
35889 case V8DF_FTYPE_V8DF_V8DI_INT_V8DF_QI_INT:
35890 case V16SF_FTYPE_V16SF_V16SI_INT_V16SF_HI_INT:
35891 case V2DF_FTYPE_V2DF_V2DI_INT_V2DF_QI_INT:
35892 case V4SF_FTYPE_V4SF_V4SI_INT_V4SF_QI_INT:
35894 nargs_constant = 4;
35897 gcc_unreachable ();
35899 gcc_assert (nargs <= ARRAY_SIZE (args));
35903 || GET_MODE (target) != tmode
35904 || !insn_p->operand[0].predicate (target, tmode))
35905 target = gen_reg_rtx (tmode);
35907 for (i = 0; i < nargs; i++)
35909 tree arg = CALL_EXPR_ARG (exp, i);
35910 rtx op = expand_normal (arg);
35911 machine_mode mode = insn_p->operand[i + 1].mode;
35912 bool match = insn_p->operand[i + 1].predicate (op, mode);
35914 if (i == nargs - nargs_constant)
35920 case CODE_FOR_avx512f_getmantv8df_mask_round:
35921 case CODE_FOR_avx512f_getmantv16sf_mask_round:
35922 case CODE_FOR_avx512f_vgetmantv2df_round:
35923 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
35924 case CODE_FOR_avx512f_vgetmantv4sf_round:
35925 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
35926 error ("the immediate argument must be a 4-bit immediate");
35928 case CODE_FOR_avx512f_cmpv8df3_mask_round:
35929 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
35930 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
35931 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
35932 error ("the immediate argument must be a 5-bit immediate");
35935 error ("the immediate argument must be an 8-bit immediate");
35940 else if (i == nargs-1)
35942 if (!insn_p->operand[nargs].predicate (op, SImode))
35944 error ("incorrect rounding operand");
35948 /* If there is no rounding use normal version of the pattern. */
35949 if (INTVAL (op) == NO_ROUND)
35950 redundant_embed_rnd = 1;
35954 if (VECTOR_MODE_P (mode))
35955 op = safe_vector_operand (op, mode);
35957 op = fixup_modeless_constant (op, mode);
35959 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35961 if (optimize || !match)
35962 op = copy_to_mode_reg (mode, op);
35966 op = copy_to_reg (op);
35967 op = lowpart_subreg (mode, op, GET_MODE (op));
35972 args[i].mode = mode;
35978 pat = GEN_FCN (icode) (target, args[0].op);
35981 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35984 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35988 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35989 args[2].op, args[3].op);
35992 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35993 args[2].op, args[3].op, args[4].op);
35996 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35997 args[2].op, args[3].op, args[4].op,
36001 gcc_unreachable ();
36007 if (redundant_embed_rnd)
36008 pat = ix86_erase_embedded_rounding (pat);
36014 /* Subroutine of ix86_expand_builtin to take care of special insns
36015 with variable number of operands. */
36018 ix86_expand_special_args_builtin (const struct builtin_description *d,
36019 tree exp, rtx target)
36023 unsigned int i, nargs, arg_adjust, memory;
36024 bool aligned_mem = false;
36030 enum insn_code icode = d->icode;
36031 bool last_arg_constant = false;
36032 const struct insn_data_d *insn_p = &insn_data[icode];
36033 machine_mode tmode = insn_p->operand[0].mode;
36034 enum { load, store } klass;
36036 switch ((enum ix86_builtin_func_type) d->flag)
36038 case VOID_FTYPE_VOID:
36039 emit_insn (GEN_FCN (icode) (target));
36041 case VOID_FTYPE_UINT64:
36042 case VOID_FTYPE_UNSIGNED:
36048 case INT_FTYPE_VOID:
36049 case USHORT_FTYPE_VOID:
36050 case UINT64_FTYPE_VOID:
36051 case UINT_FTYPE_VOID:
36052 case UNSIGNED_FTYPE_VOID:
36057 case UINT64_FTYPE_PUNSIGNED:
36058 case V2DI_FTYPE_PV2DI:
36059 case V4DI_FTYPE_PV4DI:
36060 case V32QI_FTYPE_PCCHAR:
36061 case V16QI_FTYPE_PCCHAR:
36062 case V8SF_FTYPE_PCV4SF:
36063 case V8SF_FTYPE_PCFLOAT:
36064 case V4SF_FTYPE_PCFLOAT:
36065 case V4DF_FTYPE_PCV2DF:
36066 case V4DF_FTYPE_PCDOUBLE:
36067 case V2DF_FTYPE_PCDOUBLE:
36068 case VOID_FTYPE_PVOID:
36069 case V8DI_FTYPE_PV8DI:
36075 case CODE_FOR_sse4_1_movntdqa:
36076 case CODE_FOR_avx2_movntdqa:
36077 case CODE_FOR_avx512f_movntdqa:
36078 aligned_mem = true;
36084 case VOID_FTYPE_PV2SF_V4SF:
36085 case VOID_FTYPE_PV8DI_V8DI:
36086 case VOID_FTYPE_PV4DI_V4DI:
36087 case VOID_FTYPE_PV2DI_V2DI:
36088 case VOID_FTYPE_PCHAR_V32QI:
36089 case VOID_FTYPE_PCHAR_V16QI:
36090 case VOID_FTYPE_PFLOAT_V16SF:
36091 case VOID_FTYPE_PFLOAT_V8SF:
36092 case VOID_FTYPE_PFLOAT_V4SF:
36093 case VOID_FTYPE_PDOUBLE_V8DF:
36094 case VOID_FTYPE_PDOUBLE_V4DF:
36095 case VOID_FTYPE_PDOUBLE_V2DF:
36096 case VOID_FTYPE_PLONGLONG_LONGLONG:
36097 case VOID_FTYPE_PULONGLONG_ULONGLONG:
36098 case VOID_FTYPE_PUNSIGNED_UNSIGNED:
36099 case VOID_FTYPE_PINT_INT:
36102 /* Reserve memory operand for target. */
36103 memory = ARRAY_SIZE (args);
36106 /* These builtins and instructions require the memory
36107 to be properly aligned. */
36108 case CODE_FOR_avx_movntv4di:
36109 case CODE_FOR_sse2_movntv2di:
36110 case CODE_FOR_avx_movntv8sf:
36111 case CODE_FOR_sse_movntv4sf:
36112 case CODE_FOR_sse4a_vmmovntv4sf:
36113 case CODE_FOR_avx_movntv4df:
36114 case CODE_FOR_sse2_movntv2df:
36115 case CODE_FOR_sse4a_vmmovntv2df:
36116 case CODE_FOR_sse2_movntidi:
36117 case CODE_FOR_sse_movntq:
36118 case CODE_FOR_sse2_movntisi:
36119 case CODE_FOR_avx512f_movntv16sf:
36120 case CODE_FOR_avx512f_movntv8df:
36121 case CODE_FOR_avx512f_movntv8di:
36122 aligned_mem = true;
36128 case VOID_FTYPE_PVOID_PCVOID:
36134 case V4SF_FTYPE_V4SF_PCV2SF:
36135 case V2DF_FTYPE_V2DF_PCDOUBLE:
36140 case V8SF_FTYPE_PCV8SF_V8SI:
36141 case V4DF_FTYPE_PCV4DF_V4DI:
36142 case V4SF_FTYPE_PCV4SF_V4SI:
36143 case V2DF_FTYPE_PCV2DF_V2DI:
36144 case V8SI_FTYPE_PCV8SI_V8SI:
36145 case V4DI_FTYPE_PCV4DI_V4DI:
36146 case V4SI_FTYPE_PCV4SI_V4SI:
36147 case V2DI_FTYPE_PCV2DI_V2DI:
36148 case VOID_FTYPE_INT_INT64:
36153 case VOID_FTYPE_PV8DF_V8DF_UQI:
36154 case VOID_FTYPE_PV4DF_V4DF_UQI:
36155 case VOID_FTYPE_PV2DF_V2DF_UQI:
36156 case VOID_FTYPE_PV16SF_V16SF_UHI:
36157 case VOID_FTYPE_PV8SF_V8SF_UQI:
36158 case VOID_FTYPE_PV4SF_V4SF_UQI:
36159 case VOID_FTYPE_PV8DI_V8DI_UQI:
36160 case VOID_FTYPE_PV4DI_V4DI_UQI:
36161 case VOID_FTYPE_PV2DI_V2DI_UQI:
36162 case VOID_FTYPE_PV16SI_V16SI_UHI:
36163 case VOID_FTYPE_PV8SI_V8SI_UQI:
36164 case VOID_FTYPE_PV4SI_V4SI_UQI:
36165 case VOID_FTYPE_PV64QI_V64QI_UDI:
36166 case VOID_FTYPE_PV32HI_V32HI_USI:
36167 case VOID_FTYPE_PV32QI_V32QI_USI:
36168 case VOID_FTYPE_PV16QI_V16QI_UHI:
36169 case VOID_FTYPE_PV16HI_V16HI_UHI:
36170 case VOID_FTYPE_PV8HI_V8HI_UQI:
36173 /* These builtins and instructions require the memory
36174 to be properly aligned. */
36175 case CODE_FOR_avx512f_storev16sf_mask:
36176 case CODE_FOR_avx512f_storev16si_mask:
36177 case CODE_FOR_avx512f_storev8df_mask:
36178 case CODE_FOR_avx512f_storev8di_mask:
36179 case CODE_FOR_avx512vl_storev8sf_mask:
36180 case CODE_FOR_avx512vl_storev8si_mask:
36181 case CODE_FOR_avx512vl_storev4df_mask:
36182 case CODE_FOR_avx512vl_storev4di_mask:
36183 case CODE_FOR_avx512vl_storev4sf_mask:
36184 case CODE_FOR_avx512vl_storev4si_mask:
36185 case CODE_FOR_avx512vl_storev2df_mask:
36186 case CODE_FOR_avx512vl_storev2di_mask:
36187 aligned_mem = true;
36193 case VOID_FTYPE_PV8SF_V8SI_V8SF:
36194 case VOID_FTYPE_PV4DF_V4DI_V4DF:
36195 case VOID_FTYPE_PV4SF_V4SI_V4SF:
36196 case VOID_FTYPE_PV2DF_V2DI_V2DF:
36197 case VOID_FTYPE_PV8SI_V8SI_V8SI:
36198 case VOID_FTYPE_PV4DI_V4DI_V4DI:
36199 case VOID_FTYPE_PV4SI_V4SI_V4SI:
36200 case VOID_FTYPE_PV2DI_V2DI_V2DI:
36201 case VOID_FTYPE_PV8SI_V8DI_UQI:
36202 case VOID_FTYPE_PV8HI_V8DI_UQI:
36203 case VOID_FTYPE_PV16HI_V16SI_UHI:
36204 case VOID_FTYPE_PV16QI_V8DI_UQI:
36205 case VOID_FTYPE_PV16QI_V16SI_UHI:
36206 case VOID_FTYPE_PV4SI_V4DI_UQI:
36207 case VOID_FTYPE_PV4SI_V2DI_UQI:
36208 case VOID_FTYPE_PV8HI_V4DI_UQI:
36209 case VOID_FTYPE_PV8HI_V2DI_UQI:
36210 case VOID_FTYPE_PV8HI_V8SI_UQI:
36211 case VOID_FTYPE_PV8HI_V4SI_UQI:
36212 case VOID_FTYPE_PV16QI_V4DI_UQI:
36213 case VOID_FTYPE_PV16QI_V2DI_UQI:
36214 case VOID_FTYPE_PV16QI_V8SI_UQI:
36215 case VOID_FTYPE_PV16QI_V4SI_UQI:
36216 case VOID_FTYPE_PCHAR_V64QI_UDI:
36217 case VOID_FTYPE_PCHAR_V32QI_USI:
36218 case VOID_FTYPE_PCHAR_V16QI_UHI:
36219 case VOID_FTYPE_PSHORT_V32HI_USI:
36220 case VOID_FTYPE_PSHORT_V16HI_UHI:
36221 case VOID_FTYPE_PSHORT_V8HI_UQI:
36222 case VOID_FTYPE_PINT_V16SI_UHI:
36223 case VOID_FTYPE_PINT_V8SI_UQI:
36224 case VOID_FTYPE_PINT_V4SI_UQI:
36225 case VOID_FTYPE_PINT64_V8DI_UQI:
36226 case VOID_FTYPE_PINT64_V4DI_UQI:
36227 case VOID_FTYPE_PINT64_V2DI_UQI:
36228 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
36229 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
36230 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
36231 case VOID_FTYPE_PFLOAT_V16SF_UHI:
36232 case VOID_FTYPE_PFLOAT_V8SF_UQI:
36233 case VOID_FTYPE_PFLOAT_V4SF_UQI:
36234 case VOID_FTYPE_PV32QI_V32HI_USI:
36235 case VOID_FTYPE_PV16QI_V16HI_UHI:
36236 case VOID_FTYPE_PV8QI_V8HI_UQI:
36239 /* Reserve memory operand for target. */
36240 memory = ARRAY_SIZE (args);
36242 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
36243 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
36244 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
36245 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
36246 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
36247 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
36248 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
36249 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
36250 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
36251 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
36252 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
36253 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
36254 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
36255 case V32HI_FTYPE_PCV32HI_V32HI_USI:
36256 case V32QI_FTYPE_PCV32QI_V32QI_USI:
36257 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
36258 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
36259 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
36262 /* These builtins and instructions require the memory
36263 to be properly aligned. */
36264 case CODE_FOR_avx512f_loadv16sf_mask:
36265 case CODE_FOR_avx512f_loadv16si_mask:
36266 case CODE_FOR_avx512f_loadv8df_mask:
36267 case CODE_FOR_avx512f_loadv8di_mask:
36268 case CODE_FOR_avx512vl_loadv8sf_mask:
36269 case CODE_FOR_avx512vl_loadv8si_mask:
36270 case CODE_FOR_avx512vl_loadv4df_mask:
36271 case CODE_FOR_avx512vl_loadv4di_mask:
36272 case CODE_FOR_avx512vl_loadv4sf_mask:
36273 case CODE_FOR_avx512vl_loadv4si_mask:
36274 case CODE_FOR_avx512vl_loadv2df_mask:
36275 case CODE_FOR_avx512vl_loadv2di_mask:
36276 case CODE_FOR_avx512bw_loadv64qi_mask:
36277 case CODE_FOR_avx512vl_loadv32qi_mask:
36278 case CODE_FOR_avx512vl_loadv16qi_mask:
36279 case CODE_FOR_avx512bw_loadv32hi_mask:
36280 case CODE_FOR_avx512vl_loadv16hi_mask:
36281 case CODE_FOR_avx512vl_loadv8hi_mask:
36282 aligned_mem = true;
36288 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
36289 case V32QI_FTYPE_PCCHAR_V32QI_USI:
36290 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
36291 case V32HI_FTYPE_PCSHORT_V32HI_USI:
36292 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
36293 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
36294 case V16SI_FTYPE_PCINT_V16SI_UHI:
36295 case V8SI_FTYPE_PCINT_V8SI_UQI:
36296 case V4SI_FTYPE_PCINT_V4SI_UQI:
36297 case V8DI_FTYPE_PCINT64_V8DI_UQI:
36298 case V4DI_FTYPE_PCINT64_V4DI_UQI:
36299 case V2DI_FTYPE_PCINT64_V2DI_UQI:
36300 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
36301 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
36302 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
36303 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
36304 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
36305 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
36310 case VOID_FTYPE_UINT_UINT_UINT:
36311 case VOID_FTYPE_UINT64_UINT_UINT:
36312 case UCHAR_FTYPE_UINT_UINT_UINT:
36313 case UCHAR_FTYPE_UINT64_UINT_UINT:
36316 memory = ARRAY_SIZE (args);
36317 last_arg_constant = true;
36320 gcc_unreachable ();
36323 gcc_assert (nargs <= ARRAY_SIZE (args));
36325 if (klass == store)
36327 arg = CALL_EXPR_ARG (exp, 0);
36328 op = expand_normal (arg);
36329 gcc_assert (target == 0);
36332 op = ix86_zero_extend_to_Pmode (op);
36333 target = gen_rtx_MEM (tmode, op);
36334 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
36335 on it. Try to improve it using get_pointer_alignment,
36336 and if the special builtin is one that requires strict
36337 mode alignment, also from it's GET_MODE_ALIGNMENT.
36338 Failure to do so could lead to ix86_legitimate_combined_insn
36339 rejecting all changes to such insns. */
36340 unsigned int align = get_pointer_alignment (arg);
36341 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
36342 align = GET_MODE_ALIGNMENT (tmode);
36343 if (MEM_ALIGN (target) < align)
36344 set_mem_align (target, align);
36347 target = force_reg (tmode, op);
36355 || !register_operand (target, tmode)
36356 || GET_MODE (target) != tmode)
36357 target = gen_reg_rtx (tmode);
36360 for (i = 0; i < nargs; i++)
36362 machine_mode mode = insn_p->operand[i + 1].mode;
36365 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
36366 op = expand_normal (arg);
36367 match = insn_p->operand[i + 1].predicate (op, mode);
36369 if (last_arg_constant && (i + 1) == nargs)
36373 if (icode == CODE_FOR_lwp_lwpvalsi3
36374 || icode == CODE_FOR_lwp_lwpinssi3
36375 || icode == CODE_FOR_lwp_lwpvaldi3
36376 || icode == CODE_FOR_lwp_lwpinsdi3)
36377 error ("the last argument must be a 32-bit immediate");
36379 error ("the last argument must be an 8-bit immediate");
36387 /* This must be the memory operand. */
36388 op = ix86_zero_extend_to_Pmode (op);
36389 op = gen_rtx_MEM (mode, op);
36390 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
36391 on it. Try to improve it using get_pointer_alignment,
36392 and if the special builtin is one that requires strict
36393 mode alignment, also from it's GET_MODE_ALIGNMENT.
36394 Failure to do so could lead to ix86_legitimate_combined_insn
36395 rejecting all changes to such insns. */
36396 unsigned int align = get_pointer_alignment (arg);
36397 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
36398 align = GET_MODE_ALIGNMENT (mode);
36399 if (MEM_ALIGN (op) < align)
36400 set_mem_align (op, align);
36404 /* This must be register. */
36405 if (VECTOR_MODE_P (mode))
36406 op = safe_vector_operand (op, mode);
36408 op = fixup_modeless_constant (op, mode);
36410 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
36411 op = copy_to_mode_reg (mode, op);
36414 op = copy_to_reg (op);
36415 op = lowpart_subreg (mode, op, GET_MODE (op));
36421 args[i].mode = mode;
36427 pat = GEN_FCN (icode) (target);
36430 pat = GEN_FCN (icode) (target, args[0].op);
36433 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36436 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36439 gcc_unreachable ();
36445 return klass == store ? 0 : target;
36448 /* Return the integer constant in ARG. Constrain it to be in the range
36449 of the subparts of VEC_TYPE; issue an error if not. */
36452 get_element_number (tree vec_type, tree arg)
36454 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
36456 if (!tree_fits_uhwi_p (arg)
36457 || (elt = tree_to_uhwi (arg), elt > max))
36459 error ("selector must be an integer constant in the range 0..%wi", max);
36466 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36467 ix86_expand_vector_init. We DO have language-level syntax for this, in
36468 the form of (type){ init-list }. Except that since we can't place emms
36469 instructions from inside the compiler, we can't allow the use of MMX
36470 registers unless the user explicitly asks for it. So we do *not* define
36471 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
36472 we have builtins invoked by mmintrin.h that gives us license to emit
36473 these sorts of instructions. */
36476 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
36478 machine_mode tmode = TYPE_MODE (type);
36479 machine_mode inner_mode = GET_MODE_INNER (tmode);
36480 int i, n_elt = GET_MODE_NUNITS (tmode);
36481 rtvec v = rtvec_alloc (n_elt);
36483 gcc_assert (VECTOR_MODE_P (tmode));
36484 gcc_assert (call_expr_nargs (exp) == n_elt);
36486 for (i = 0; i < n_elt; ++i)
36488 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
36489 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
36492 if (!target || !register_operand (target, tmode))
36493 target = gen_reg_rtx (tmode);
36495 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
36499 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36500 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
36501 had a language-level syntax for referencing vector elements. */
36504 ix86_expand_vec_ext_builtin (tree exp, rtx target)
36506 machine_mode tmode, mode0;
36511 arg0 = CALL_EXPR_ARG (exp, 0);
36512 arg1 = CALL_EXPR_ARG (exp, 1);
36514 op0 = expand_normal (arg0);
36515 elt = get_element_number (TREE_TYPE (arg0), arg1);
36517 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36518 mode0 = TYPE_MODE (TREE_TYPE (arg0));
36519 gcc_assert (VECTOR_MODE_P (mode0));
36521 op0 = force_reg (mode0, op0);
36523 if (optimize || !target || !register_operand (target, tmode))
36524 target = gen_reg_rtx (tmode);
36526 ix86_expand_vector_extract (true, target, op0, elt);
36531 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36532 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
36533 a language-level syntax for referencing vector elements. */
36536 ix86_expand_vec_set_builtin (tree exp)
36538 machine_mode tmode, mode1;
36539 tree arg0, arg1, arg2;
36541 rtx op0, op1, target;
36543 arg0 = CALL_EXPR_ARG (exp, 0);
36544 arg1 = CALL_EXPR_ARG (exp, 1);
36545 arg2 = CALL_EXPR_ARG (exp, 2);
36547 tmode = TYPE_MODE (TREE_TYPE (arg0));
36548 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36549 gcc_assert (VECTOR_MODE_P (tmode));
36551 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
36552 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
36553 elt = get_element_number (TREE_TYPE (arg0), arg2);
36555 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
36556 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
36558 op0 = force_reg (tmode, op0);
36559 op1 = force_reg (mode1, op1);
36561 /* OP0 is the source of these builtin functions and shouldn't be
36562 modified. Create a copy, use it and return it as target. */
36563 target = gen_reg_rtx (tmode);
36564 emit_move_insn (target, op0);
36565 ix86_expand_vector_set (true, target, op1, elt);
36570 /* Expand an expression EXP that calls a built-in function,
36571 with result going to TARGET if that's convenient
36572 (and in mode MODE if that's convenient).
36573 SUBTARGET may be used as the target for computing one of EXP's operands.
36574 IGNORE is nonzero if the value is to be ignored. */
36577 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
36578 machine_mode mode, int ignore)
36581 enum insn_code icode, icode2;
36582 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
36583 tree arg0, arg1, arg2, arg3, arg4;
36584 rtx op0, op1, op2, op3, op4, pat, pat2, insn;
36585 machine_mode mode0, mode1, mode2, mode3, mode4;
36586 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
36588 /* For CPU builtins that can be folded, fold first and expand the fold. */
36591 case IX86_BUILTIN_CPU_INIT:
36593 /* Make it call __cpu_indicator_init in libgcc. */
36594 tree call_expr, fndecl, type;
36595 type = build_function_type_list (integer_type_node, NULL_TREE);
36596 fndecl = build_fn_decl ("__cpu_indicator_init", type);
36597 call_expr = build_call_expr (fndecl, 0);
36598 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
36600 case IX86_BUILTIN_CPU_IS:
36601 case IX86_BUILTIN_CPU_SUPPORTS:
36603 tree arg0 = CALL_EXPR_ARG (exp, 0);
36604 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
36605 gcc_assert (fold_expr != NULL_TREE);
36606 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
36610 HOST_WIDE_INT isa = ix86_isa_flags;
36611 HOST_WIDE_INT isa2 = ix86_isa_flags2;
36612 HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
36613 HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2;
36614 /* The general case is we require all the ISAs specified in bisa{,2}
36616 The exceptions are:
36617 OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
36618 OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
36619 OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
36620 where for each this pair it is sufficient if either of the ISAs is
36621 enabled, plus if it is ored with other options also those others. */
36622 if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36623 == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36624 && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
36625 isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A);
36626 if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36627 == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36628 && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0)
36629 isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32);
36630 if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36631 == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36632 && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
36633 isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
36634 if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
36636 char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
36637 (enum fpmath_unit) 0, false);
36639 error ("%qE needs unknown isa option", fndecl);
36642 gcc_assert (opts != NULL);
36643 error ("%qE needs isa option %s", fndecl, opts);
36646 return expand_call (exp, target, ignore);
36651 case IX86_BUILTIN_MASKMOVQ:
36652 case IX86_BUILTIN_MASKMOVDQU:
36653 icode = (fcode == IX86_BUILTIN_MASKMOVQ
36654 ? CODE_FOR_mmx_maskmovq
36655 : CODE_FOR_sse2_maskmovdqu);
36656 /* Note the arg order is different from the operand order. */
36657 arg1 = CALL_EXPR_ARG (exp, 0);
36658 arg2 = CALL_EXPR_ARG (exp, 1);
36659 arg0 = CALL_EXPR_ARG (exp, 2);
36660 op0 = expand_normal (arg0);
36661 op1 = expand_normal (arg1);
36662 op2 = expand_normal (arg2);
36663 mode0 = insn_data[icode].operand[0].mode;
36664 mode1 = insn_data[icode].operand[1].mode;
36665 mode2 = insn_data[icode].operand[2].mode;
36667 op0 = ix86_zero_extend_to_Pmode (op0);
36668 op0 = gen_rtx_MEM (mode1, op0);
36670 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36671 op0 = copy_to_mode_reg (mode0, op0);
36672 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36673 op1 = copy_to_mode_reg (mode1, op1);
36674 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36675 op2 = copy_to_mode_reg (mode2, op2);
36676 pat = GEN_FCN (icode) (op0, op1, op2);
36682 case IX86_BUILTIN_LDMXCSR:
36683 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
36684 target = assign_386_stack_local (SImode, SLOT_TEMP);
36685 emit_move_insn (target, op0);
36686 emit_insn (gen_sse_ldmxcsr (target));
36689 case IX86_BUILTIN_STMXCSR:
36690 target = assign_386_stack_local (SImode, SLOT_TEMP);
36691 emit_insn (gen_sse_stmxcsr (target));
36692 return copy_to_mode_reg (SImode, target);
36694 case IX86_BUILTIN_CLFLUSH:
36695 arg0 = CALL_EXPR_ARG (exp, 0);
36696 op0 = expand_normal (arg0);
36697 icode = CODE_FOR_sse2_clflush;
36698 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36699 op0 = ix86_zero_extend_to_Pmode (op0);
36701 emit_insn (gen_sse2_clflush (op0));
36704 case IX86_BUILTIN_CLWB:
36705 arg0 = CALL_EXPR_ARG (exp, 0);
36706 op0 = expand_normal (arg0);
36707 icode = CODE_FOR_clwb;
36708 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36709 op0 = ix86_zero_extend_to_Pmode (op0);
36711 emit_insn (gen_clwb (op0));
36714 case IX86_BUILTIN_CLFLUSHOPT:
36715 arg0 = CALL_EXPR_ARG (exp, 0);
36716 op0 = expand_normal (arg0);
36717 icode = CODE_FOR_clflushopt;
36718 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36719 op0 = ix86_zero_extend_to_Pmode (op0);
36721 emit_insn (gen_clflushopt (op0));
36724 case IX86_BUILTIN_MONITOR:
36725 case IX86_BUILTIN_MONITORX:
36726 arg0 = CALL_EXPR_ARG (exp, 0);
36727 arg1 = CALL_EXPR_ARG (exp, 1);
36728 arg2 = CALL_EXPR_ARG (exp, 2);
36729 op0 = expand_normal (arg0);
36730 op1 = expand_normal (arg1);
36731 op2 = expand_normal (arg2);
36733 op0 = ix86_zero_extend_to_Pmode (op0);
36735 op1 = copy_to_mode_reg (SImode, op1);
36737 op2 = copy_to_mode_reg (SImode, op2);
36739 emit_insn (fcode == IX86_BUILTIN_MONITOR
36740 ? ix86_gen_monitor (op0, op1, op2)
36741 : ix86_gen_monitorx (op0, op1, op2));
36744 case IX86_BUILTIN_MWAIT:
36745 arg0 = CALL_EXPR_ARG (exp, 0);
36746 arg1 = CALL_EXPR_ARG (exp, 1);
36747 op0 = expand_normal (arg0);
36748 op1 = expand_normal (arg1);
36750 op0 = copy_to_mode_reg (SImode, op0);
36752 op1 = copy_to_mode_reg (SImode, op1);
36753 emit_insn (gen_sse3_mwait (op0, op1));
36756 case IX86_BUILTIN_MWAITX:
36757 arg0 = CALL_EXPR_ARG (exp, 0);
36758 arg1 = CALL_EXPR_ARG (exp, 1);
36759 arg2 = CALL_EXPR_ARG (exp, 2);
36760 op0 = expand_normal (arg0);
36761 op1 = expand_normal (arg1);
36762 op2 = expand_normal (arg2);
36764 op0 = copy_to_mode_reg (SImode, op0);
36766 op1 = copy_to_mode_reg (SImode, op1);
36768 op2 = copy_to_mode_reg (SImode, op2);
36769 emit_insn (gen_mwaitx (op0, op1, op2));
36772 case IX86_BUILTIN_UMONITOR:
36773 arg0 = CALL_EXPR_ARG (exp, 0);
36774 op0 = expand_normal (arg0);
36776 op0 = ix86_zero_extend_to_Pmode (op0);
36778 insn = (TARGET_64BIT
36779 ? gen_umonitor_di (op0)
36780 : gen_umonitor_si (op0));
36785 case IX86_BUILTIN_UMWAIT:
36786 case IX86_BUILTIN_TPAUSE:
36787 arg0 = CALL_EXPR_ARG (exp, 0);
36788 arg1 = CALL_EXPR_ARG (exp, 1);
36789 op0 = expand_normal (arg0);
36790 op1 = expand_normal (arg1);
36793 op0 = copy_to_mode_reg (SImode, op0);
36795 op1 = force_reg (DImode, op1);
36799 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
36800 NULL, 1, OPTAB_DIRECT);
36803 case IX86_BUILTIN_UMWAIT:
36804 icode = CODE_FOR_umwait_rex64;
36806 case IX86_BUILTIN_TPAUSE:
36807 icode = CODE_FOR_tpause_rex64;
36810 gcc_unreachable ();
36813 op2 = gen_lowpart (SImode, op2);
36814 op1 = gen_lowpart (SImode, op1);
36815 pat = GEN_FCN (icode) (op0, op1, op2);
36821 case IX86_BUILTIN_UMWAIT:
36822 icode = CODE_FOR_umwait;
36824 case IX86_BUILTIN_TPAUSE:
36825 icode = CODE_FOR_tpause;
36828 gcc_unreachable ();
36830 pat = GEN_FCN (icode) (op0, op1);
36839 || !register_operand (target, QImode))
36840 target = gen_reg_rtx (QImode);
36842 pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
36844 emit_insn (gen_rtx_SET (target, pat));
36848 case IX86_BUILTIN_CLZERO:
36849 arg0 = CALL_EXPR_ARG (exp, 0);
36850 op0 = expand_normal (arg0);
36852 op0 = ix86_zero_extend_to_Pmode (op0);
36853 emit_insn (ix86_gen_clzero (op0));
36856 case IX86_BUILTIN_CLDEMOTE:
36857 arg0 = CALL_EXPR_ARG (exp, 0);
36858 op0 = expand_normal (arg0);
36859 icode = CODE_FOR_cldemote;
36860 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36861 op0 = ix86_zero_extend_to_Pmode (op0);
36863 emit_insn (gen_cldemote (op0));
36866 case IX86_BUILTIN_VEC_INIT_V2SI:
36867 case IX86_BUILTIN_VEC_INIT_V4HI:
36868 case IX86_BUILTIN_VEC_INIT_V8QI:
36869 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
36871 case IX86_BUILTIN_VEC_EXT_V2DF:
36872 case IX86_BUILTIN_VEC_EXT_V2DI:
36873 case IX86_BUILTIN_VEC_EXT_V4SF:
36874 case IX86_BUILTIN_VEC_EXT_V4SI:
36875 case IX86_BUILTIN_VEC_EXT_V8HI:
36876 case IX86_BUILTIN_VEC_EXT_V2SI:
36877 case IX86_BUILTIN_VEC_EXT_V4HI:
36878 case IX86_BUILTIN_VEC_EXT_V16QI:
36879 return ix86_expand_vec_ext_builtin (exp, target);
36881 case IX86_BUILTIN_VEC_SET_V2DI:
36882 case IX86_BUILTIN_VEC_SET_V4SF:
36883 case IX86_BUILTIN_VEC_SET_V4SI:
36884 case IX86_BUILTIN_VEC_SET_V8HI:
36885 case IX86_BUILTIN_VEC_SET_V4HI:
36886 case IX86_BUILTIN_VEC_SET_V16QI:
36887 return ix86_expand_vec_set_builtin (exp);
36889 case IX86_BUILTIN_NANQ:
36890 case IX86_BUILTIN_NANSQ:
36891 return expand_call (exp, target, ignore);
36893 case IX86_BUILTIN_RDPID:
36895 op0 = gen_reg_rtx (word_mode);
36899 insn = gen_rdpid_rex64 (op0);
36900 op0 = convert_to_mode (SImode, op0, 1);
36903 insn = gen_rdpid (op0);
36908 || !register_operand (target, SImode))
36909 target = gen_reg_rtx (SImode);
36911 emit_move_insn (target, op0);
36914 case IX86_BUILTIN_RDPMC:
36915 case IX86_BUILTIN_RDTSC:
36916 case IX86_BUILTIN_RDTSCP:
36917 case IX86_BUILTIN_XGETBV:
36919 op0 = gen_reg_rtx (DImode);
36920 op1 = gen_reg_rtx (DImode);
36922 if (fcode == IX86_BUILTIN_RDPMC)
36924 arg0 = CALL_EXPR_ARG (exp, 0);
36925 op2 = expand_normal (arg0);
36926 if (!register_operand (op2, SImode))
36927 op2 = copy_to_mode_reg (SImode, op2);
36929 insn = (TARGET_64BIT
36930 ? gen_rdpmc_rex64 (op0, op1, op2)
36931 : gen_rdpmc (op0, op2));
36934 else if (fcode == IX86_BUILTIN_XGETBV)
36936 arg0 = CALL_EXPR_ARG (exp, 0);
36937 op2 = expand_normal (arg0);
36938 if (!register_operand (op2, SImode))
36939 op2 = copy_to_mode_reg (SImode, op2);
36941 insn = (TARGET_64BIT
36942 ? gen_xgetbv_rex64 (op0, op1, op2)
36943 : gen_xgetbv (op0, op2));
36946 else if (fcode == IX86_BUILTIN_RDTSC)
36948 insn = (TARGET_64BIT
36949 ? gen_rdtsc_rex64 (op0, op1)
36950 : gen_rdtsc (op0));
36955 op2 = gen_reg_rtx (SImode);
36957 insn = (TARGET_64BIT
36958 ? gen_rdtscp_rex64 (op0, op1, op2)
36959 : gen_rdtscp (op0, op2));
36962 arg0 = CALL_EXPR_ARG (exp, 0);
36963 op4 = expand_normal (arg0);
36964 if (!address_operand (op4, VOIDmode))
36966 op4 = convert_memory_address (Pmode, op4);
36967 op4 = copy_addr_to_reg (op4);
36969 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
36973 || !register_operand (target, DImode))
36974 target = gen_reg_rtx (DImode);
36978 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
36979 op1, 1, OPTAB_DIRECT);
36980 op0 = expand_simple_binop (DImode, IOR, op0, op1,
36981 op0, 1, OPTAB_DIRECT);
36984 emit_move_insn (target, op0);
36987 case IX86_BUILTIN_MOVDIR64B:
36989 arg0 = CALL_EXPR_ARG (exp, 0);
36990 arg1 = CALL_EXPR_ARG (exp, 1);
36991 op0 = expand_normal (arg0);
36992 op1 = expand_normal (arg1);
36994 op0 = ix86_zero_extend_to_Pmode (op0);
36995 if (!address_operand (op1, VOIDmode))
36997 op1 = convert_memory_address (Pmode, op1);
36998 op1 = copy_addr_to_reg (op1);
37000 op1 = gen_rtx_MEM (XImode, op1);
37002 insn = (TARGET_64BIT
37003 ? gen_movdir64b_di (op0, op1)
37004 : gen_movdir64b_si (op0, op1));
37008 case IX86_BUILTIN_FXSAVE:
37009 case IX86_BUILTIN_FXRSTOR:
37010 case IX86_BUILTIN_FXSAVE64:
37011 case IX86_BUILTIN_FXRSTOR64:
37012 case IX86_BUILTIN_FNSTENV:
37013 case IX86_BUILTIN_FLDENV:
37017 case IX86_BUILTIN_FXSAVE:
37018 icode = CODE_FOR_fxsave;
37020 case IX86_BUILTIN_FXRSTOR:
37021 icode = CODE_FOR_fxrstor;
37023 case IX86_BUILTIN_FXSAVE64:
37024 icode = CODE_FOR_fxsave64;
37026 case IX86_BUILTIN_FXRSTOR64:
37027 icode = CODE_FOR_fxrstor64;
37029 case IX86_BUILTIN_FNSTENV:
37030 icode = CODE_FOR_fnstenv;
37032 case IX86_BUILTIN_FLDENV:
37033 icode = CODE_FOR_fldenv;
37036 gcc_unreachable ();
37039 arg0 = CALL_EXPR_ARG (exp, 0);
37040 op0 = expand_normal (arg0);
37042 if (!address_operand (op0, VOIDmode))
37044 op0 = convert_memory_address (Pmode, op0);
37045 op0 = copy_addr_to_reg (op0);
37047 op0 = gen_rtx_MEM (mode0, op0);
37049 pat = GEN_FCN (icode) (op0);
37054 case IX86_BUILTIN_XSETBV:
37055 arg0 = CALL_EXPR_ARG (exp, 0);
37056 arg1 = CALL_EXPR_ARG (exp, 1);
37057 op0 = expand_normal (arg0);
37058 op1 = expand_normal (arg1);
37061 op0 = copy_to_mode_reg (SImode, op0);
37063 op1 = force_reg (DImode, op1);
37067 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37068 NULL, 1, OPTAB_DIRECT);
37070 icode = CODE_FOR_xsetbv_rex64;
37072 op2 = gen_lowpart (SImode, op2);
37073 op1 = gen_lowpart (SImode, op1);
37074 pat = GEN_FCN (icode) (op0, op1, op2);
37078 icode = CODE_FOR_xsetbv;
37080 pat = GEN_FCN (icode) (op0, op1);
37086 case IX86_BUILTIN_XSAVE:
37087 case IX86_BUILTIN_XRSTOR:
37088 case IX86_BUILTIN_XSAVE64:
37089 case IX86_BUILTIN_XRSTOR64:
37090 case IX86_BUILTIN_XSAVEOPT:
37091 case IX86_BUILTIN_XSAVEOPT64:
37092 case IX86_BUILTIN_XSAVES:
37093 case IX86_BUILTIN_XRSTORS:
37094 case IX86_BUILTIN_XSAVES64:
37095 case IX86_BUILTIN_XRSTORS64:
37096 case IX86_BUILTIN_XSAVEC:
37097 case IX86_BUILTIN_XSAVEC64:
37098 arg0 = CALL_EXPR_ARG (exp, 0);
37099 arg1 = CALL_EXPR_ARG (exp, 1);
37100 op0 = expand_normal (arg0);
37101 op1 = expand_normal (arg1);
37103 if (!address_operand (op0, VOIDmode))
37105 op0 = convert_memory_address (Pmode, op0);
37106 op0 = copy_addr_to_reg (op0);
37108 op0 = gen_rtx_MEM (BLKmode, op0);
37110 op1 = force_reg (DImode, op1);
37114 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37115 NULL, 1, OPTAB_DIRECT);
37118 case IX86_BUILTIN_XSAVE:
37119 icode = CODE_FOR_xsave_rex64;
37121 case IX86_BUILTIN_XRSTOR:
37122 icode = CODE_FOR_xrstor_rex64;
37124 case IX86_BUILTIN_XSAVE64:
37125 icode = CODE_FOR_xsave64;
37127 case IX86_BUILTIN_XRSTOR64:
37128 icode = CODE_FOR_xrstor64;
37130 case IX86_BUILTIN_XSAVEOPT:
37131 icode = CODE_FOR_xsaveopt_rex64;
37133 case IX86_BUILTIN_XSAVEOPT64:
37134 icode = CODE_FOR_xsaveopt64;
37136 case IX86_BUILTIN_XSAVES:
37137 icode = CODE_FOR_xsaves_rex64;
37139 case IX86_BUILTIN_XRSTORS:
37140 icode = CODE_FOR_xrstors_rex64;
37142 case IX86_BUILTIN_XSAVES64:
37143 icode = CODE_FOR_xsaves64;
37145 case IX86_BUILTIN_XRSTORS64:
37146 icode = CODE_FOR_xrstors64;
37148 case IX86_BUILTIN_XSAVEC:
37149 icode = CODE_FOR_xsavec_rex64;
37151 case IX86_BUILTIN_XSAVEC64:
37152 icode = CODE_FOR_xsavec64;
37155 gcc_unreachable ();
37158 op2 = gen_lowpart (SImode, op2);
37159 op1 = gen_lowpart (SImode, op1);
37160 pat = GEN_FCN (icode) (op0, op1, op2);
37166 case IX86_BUILTIN_XSAVE:
37167 icode = CODE_FOR_xsave;
37169 case IX86_BUILTIN_XRSTOR:
37170 icode = CODE_FOR_xrstor;
37172 case IX86_BUILTIN_XSAVEOPT:
37173 icode = CODE_FOR_xsaveopt;
37175 case IX86_BUILTIN_XSAVES:
37176 icode = CODE_FOR_xsaves;
37178 case IX86_BUILTIN_XRSTORS:
37179 icode = CODE_FOR_xrstors;
37181 case IX86_BUILTIN_XSAVEC:
37182 icode = CODE_FOR_xsavec;
37185 gcc_unreachable ();
37187 pat = GEN_FCN (icode) (op0, op1);
37194 case IX86_BUILTIN_LLWPCB:
37195 arg0 = CALL_EXPR_ARG (exp, 0);
37196 op0 = expand_normal (arg0);
37197 icode = CODE_FOR_lwp_llwpcb;
37198 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37199 op0 = ix86_zero_extend_to_Pmode (op0);
37200 emit_insn (gen_lwp_llwpcb (op0));
37203 case IX86_BUILTIN_SLWPCB:
37204 icode = CODE_FOR_lwp_slwpcb;
37206 || !insn_data[icode].operand[0].predicate (target, Pmode))
37207 target = gen_reg_rtx (Pmode);
37208 emit_insn (gen_lwp_slwpcb (target));
37211 case IX86_BUILTIN_BEXTRI32:
37212 case IX86_BUILTIN_BEXTRI64:
37213 arg0 = CALL_EXPR_ARG (exp, 0);
37214 arg1 = CALL_EXPR_ARG (exp, 1);
37215 op0 = expand_normal (arg0);
37216 op1 = expand_normal (arg1);
37217 icode = (fcode == IX86_BUILTIN_BEXTRI32
37218 ? CODE_FOR_tbm_bextri_si
37219 : CODE_FOR_tbm_bextri_di);
37220 if (!CONST_INT_P (op1))
37222 error ("last argument must be an immediate");
37227 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
37228 unsigned char lsb_index = INTVAL (op1) & 0xFF;
37229 op1 = GEN_INT (length);
37230 op2 = GEN_INT (lsb_index);
37231 pat = GEN_FCN (icode) (target, op0, op1, op2);
37237 case IX86_BUILTIN_RDRAND16_STEP:
37238 icode = CODE_FOR_rdrandhi_1;
37242 case IX86_BUILTIN_RDRAND32_STEP:
37243 icode = CODE_FOR_rdrandsi_1;
37247 case IX86_BUILTIN_RDRAND64_STEP:
37248 icode = CODE_FOR_rdranddi_1;
37252 arg0 = CALL_EXPR_ARG (exp, 0);
37253 op1 = expand_normal (arg0);
37254 if (!address_operand (op1, VOIDmode))
37256 op1 = convert_memory_address (Pmode, op1);
37257 op1 = copy_addr_to_reg (op1);
37260 op0 = gen_reg_rtx (mode0);
37261 emit_insn (GEN_FCN (icode) (op0));
37263 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37265 op1 = gen_reg_rtx (SImode);
37266 emit_move_insn (op1, CONST1_RTX (SImode));
37268 /* Emit SImode conditional move. */
37269 if (mode0 == HImode)
37271 if (TARGET_ZERO_EXTEND_WITH_AND
37272 && optimize_function_for_speed_p (cfun))
37274 op2 = force_reg (SImode, const0_rtx);
37276 emit_insn (gen_movstricthi
37277 (gen_lowpart (HImode, op2), op0));
37281 op2 = gen_reg_rtx (SImode);
37283 emit_insn (gen_zero_extendhisi2 (op2, op0));
37286 else if (mode0 == SImode)
37289 op2 = gen_rtx_SUBREG (SImode, op0, 0);
37292 || !register_operand (target, SImode))
37293 target = gen_reg_rtx (SImode);
37295 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
37297 emit_insn (gen_rtx_SET (target,
37298 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
37301 case IX86_BUILTIN_RDSEED16_STEP:
37302 icode = CODE_FOR_rdseedhi_1;
37306 case IX86_BUILTIN_RDSEED32_STEP:
37307 icode = CODE_FOR_rdseedsi_1;
37311 case IX86_BUILTIN_RDSEED64_STEP:
37312 icode = CODE_FOR_rdseeddi_1;
37316 arg0 = CALL_EXPR_ARG (exp, 0);
37317 op1 = expand_normal (arg0);
37318 if (!address_operand (op1, VOIDmode))
37320 op1 = convert_memory_address (Pmode, op1);
37321 op1 = copy_addr_to_reg (op1);
37324 op0 = gen_reg_rtx (mode0);
37325 emit_insn (GEN_FCN (icode) (op0));
37327 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37329 op2 = gen_reg_rtx (QImode);
37331 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
37333 emit_insn (gen_rtx_SET (op2, pat));
37336 || !register_operand (target, SImode))
37337 target = gen_reg_rtx (SImode);
37339 emit_insn (gen_zero_extendqisi2 (target, op2));
37342 case IX86_BUILTIN_SBB32:
37343 icode = CODE_FOR_subborrowsi;
37344 icode2 = CODE_FOR_subborrowsi_0;
37350 case IX86_BUILTIN_SBB64:
37351 icode = CODE_FOR_subborrowdi;
37352 icode2 = CODE_FOR_subborrowdi_0;
37358 case IX86_BUILTIN_ADDCARRYX32:
37359 icode = CODE_FOR_addcarrysi;
37360 icode2 = CODE_FOR_addcarrysi_0;
37366 case IX86_BUILTIN_ADDCARRYX64:
37367 icode = CODE_FOR_addcarrydi;
37368 icode2 = CODE_FOR_addcarrydi_0;
37374 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
37375 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
37376 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
37377 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
37379 op1 = expand_normal (arg0);
37380 if (!integer_zerop (arg0))
37381 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
37383 op2 = expand_normal (arg1);
37384 if (!register_operand (op2, mode0))
37385 op2 = copy_to_mode_reg (mode0, op2);
37387 op3 = expand_normal (arg2);
37388 if (!register_operand (op3, mode0))
37389 op3 = copy_to_mode_reg (mode0, op3);
37391 op4 = expand_normal (arg3);
37392 if (!address_operand (op4, VOIDmode))
37394 op4 = convert_memory_address (Pmode, op4);
37395 op4 = copy_addr_to_reg (op4);
37398 op0 = gen_reg_rtx (mode0);
37399 if (integer_zerop (arg0))
37401 /* If arg0 is 0, optimize right away into add or sub
37402 instruction that sets CCCmode flags. */
37403 op1 = gen_rtx_REG (mode2, FLAGS_REG);
37404 emit_insn (GEN_FCN (icode2) (op0, op2, op3));
37408 /* Generate CF from input operand. */
37409 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
37411 /* Generate instruction that consumes CF. */
37412 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
37413 pat = gen_rtx_LTU (mode1, op1, const0_rtx);
37414 pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
37415 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
37418 /* Return current CF value. */
37420 target = gen_reg_rtx (QImode);
37422 pat = gen_rtx_LTU (QImode, op1, const0_rtx);
37423 emit_insn (gen_rtx_SET (target, pat));
37425 /* Store the result. */
37426 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
37430 case IX86_BUILTIN_READ_FLAGS:
37431 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
37434 || target == NULL_RTX
37435 || !nonimmediate_operand (target, word_mode)
37436 || GET_MODE (target) != word_mode)
37437 target = gen_reg_rtx (word_mode);
37439 emit_insn (gen_pop (target));
37442 case IX86_BUILTIN_WRITE_FLAGS:
37444 arg0 = CALL_EXPR_ARG (exp, 0);
37445 op0 = expand_normal (arg0);
37446 if (!general_no_elim_operand (op0, word_mode))
37447 op0 = copy_to_mode_reg (word_mode, op0);
37449 emit_insn (gen_push (op0));
37450 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
37453 case IX86_BUILTIN_KTESTC8:
37454 icode = CODE_FOR_ktestqi;
37458 case IX86_BUILTIN_KTESTZ8:
37459 icode = CODE_FOR_ktestqi;
37463 case IX86_BUILTIN_KTESTC16:
37464 icode = CODE_FOR_ktesthi;
37468 case IX86_BUILTIN_KTESTZ16:
37469 icode = CODE_FOR_ktesthi;
37473 case IX86_BUILTIN_KTESTC32:
37474 icode = CODE_FOR_ktestsi;
37478 case IX86_BUILTIN_KTESTZ32:
37479 icode = CODE_FOR_ktestsi;
37483 case IX86_BUILTIN_KTESTC64:
37484 icode = CODE_FOR_ktestdi;
37488 case IX86_BUILTIN_KTESTZ64:
37489 icode = CODE_FOR_ktestdi;
37493 case IX86_BUILTIN_KORTESTC8:
37494 icode = CODE_FOR_kortestqi;
37498 case IX86_BUILTIN_KORTESTZ8:
37499 icode = CODE_FOR_kortestqi;
37503 case IX86_BUILTIN_KORTESTC16:
37504 icode = CODE_FOR_kortesthi;
37508 case IX86_BUILTIN_KORTESTZ16:
37509 icode = CODE_FOR_kortesthi;
37513 case IX86_BUILTIN_KORTESTC32:
37514 icode = CODE_FOR_kortestsi;
37518 case IX86_BUILTIN_KORTESTZ32:
37519 icode = CODE_FOR_kortestsi;
37523 case IX86_BUILTIN_KORTESTC64:
37524 icode = CODE_FOR_kortestdi;
37528 case IX86_BUILTIN_KORTESTZ64:
37529 icode = CODE_FOR_kortestdi;
37533 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
37534 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
37535 op0 = expand_normal (arg0);
37536 op1 = expand_normal (arg1);
37538 mode0 = insn_data[icode].operand[0].mode;
37539 mode1 = insn_data[icode].operand[1].mode;
37541 if (GET_MODE (op0) != VOIDmode)
37542 op0 = force_reg (GET_MODE (op0), op0);
37544 op0 = gen_lowpart (mode0, op0);
37546 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37547 op0 = copy_to_mode_reg (mode0, op0);
37549 if (GET_MODE (op1) != VOIDmode)
37550 op1 = force_reg (GET_MODE (op1), op1);
37552 op1 = gen_lowpart (mode1, op1);
37554 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37555 op1 = copy_to_mode_reg (mode1, op1);
37557 target = gen_reg_rtx (QImode);
37559 /* Emit kortest. */
37560 emit_insn (GEN_FCN (icode) (op0, op1));
37561 /* And use setcc to return result from flags. */
37562 ix86_expand_setcc (target, EQ,
37563 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
37566 case IX86_BUILTIN_GATHERSIV2DF:
37567 icode = CODE_FOR_avx2_gathersiv2df;
37569 case IX86_BUILTIN_GATHERSIV4DF:
37570 icode = CODE_FOR_avx2_gathersiv4df;
37572 case IX86_BUILTIN_GATHERDIV2DF:
37573 icode = CODE_FOR_avx2_gatherdiv2df;
37575 case IX86_BUILTIN_GATHERDIV4DF:
37576 icode = CODE_FOR_avx2_gatherdiv4df;
37578 case IX86_BUILTIN_GATHERSIV4SF:
37579 icode = CODE_FOR_avx2_gathersiv4sf;
37581 case IX86_BUILTIN_GATHERSIV8SF:
37582 icode = CODE_FOR_avx2_gathersiv8sf;
37584 case IX86_BUILTIN_GATHERDIV4SF:
37585 icode = CODE_FOR_avx2_gatherdiv4sf;
37587 case IX86_BUILTIN_GATHERDIV8SF:
37588 icode = CODE_FOR_avx2_gatherdiv8sf;
37590 case IX86_BUILTIN_GATHERSIV2DI:
37591 icode = CODE_FOR_avx2_gathersiv2di;
37593 case IX86_BUILTIN_GATHERSIV4DI:
37594 icode = CODE_FOR_avx2_gathersiv4di;
37596 case IX86_BUILTIN_GATHERDIV2DI:
37597 icode = CODE_FOR_avx2_gatherdiv2di;
37599 case IX86_BUILTIN_GATHERDIV4DI:
37600 icode = CODE_FOR_avx2_gatherdiv4di;
37602 case IX86_BUILTIN_GATHERSIV4SI:
37603 icode = CODE_FOR_avx2_gathersiv4si;
37605 case IX86_BUILTIN_GATHERSIV8SI:
37606 icode = CODE_FOR_avx2_gathersiv8si;
37608 case IX86_BUILTIN_GATHERDIV4SI:
37609 icode = CODE_FOR_avx2_gatherdiv4si;
37611 case IX86_BUILTIN_GATHERDIV8SI:
37612 icode = CODE_FOR_avx2_gatherdiv8si;
37614 case IX86_BUILTIN_GATHERALTSIV4DF:
37615 icode = CODE_FOR_avx2_gathersiv4df;
37617 case IX86_BUILTIN_GATHERALTDIV8SF:
37618 icode = CODE_FOR_avx2_gatherdiv8sf;
37620 case IX86_BUILTIN_GATHERALTSIV4DI:
37621 icode = CODE_FOR_avx2_gathersiv4di;
37623 case IX86_BUILTIN_GATHERALTDIV8SI:
37624 icode = CODE_FOR_avx2_gatherdiv8si;
37626 case IX86_BUILTIN_GATHER3SIV16SF:
37627 icode = CODE_FOR_avx512f_gathersiv16sf;
37629 case IX86_BUILTIN_GATHER3SIV8DF:
37630 icode = CODE_FOR_avx512f_gathersiv8df;
37632 case IX86_BUILTIN_GATHER3DIV16SF:
37633 icode = CODE_FOR_avx512f_gatherdiv16sf;
37635 case IX86_BUILTIN_GATHER3DIV8DF:
37636 icode = CODE_FOR_avx512f_gatherdiv8df;
37638 case IX86_BUILTIN_GATHER3SIV16SI:
37639 icode = CODE_FOR_avx512f_gathersiv16si;
37641 case IX86_BUILTIN_GATHER3SIV8DI:
37642 icode = CODE_FOR_avx512f_gathersiv8di;
37644 case IX86_BUILTIN_GATHER3DIV16SI:
37645 icode = CODE_FOR_avx512f_gatherdiv16si;
37647 case IX86_BUILTIN_GATHER3DIV8DI:
37648 icode = CODE_FOR_avx512f_gatherdiv8di;
37650 case IX86_BUILTIN_GATHER3ALTSIV8DF:
37651 icode = CODE_FOR_avx512f_gathersiv8df;
37653 case IX86_BUILTIN_GATHER3ALTDIV16SF:
37654 icode = CODE_FOR_avx512f_gatherdiv16sf;
37656 case IX86_BUILTIN_GATHER3ALTSIV8DI:
37657 icode = CODE_FOR_avx512f_gathersiv8di;
37659 case IX86_BUILTIN_GATHER3ALTDIV16SI:
37660 icode = CODE_FOR_avx512f_gatherdiv16si;
37662 case IX86_BUILTIN_GATHER3SIV2DF:
37663 icode = CODE_FOR_avx512vl_gathersiv2df;
37665 case IX86_BUILTIN_GATHER3SIV4DF:
37666 icode = CODE_FOR_avx512vl_gathersiv4df;
37668 case IX86_BUILTIN_GATHER3DIV2DF:
37669 icode = CODE_FOR_avx512vl_gatherdiv2df;
37671 case IX86_BUILTIN_GATHER3DIV4DF:
37672 icode = CODE_FOR_avx512vl_gatherdiv4df;
37674 case IX86_BUILTIN_GATHER3SIV4SF:
37675 icode = CODE_FOR_avx512vl_gathersiv4sf;
37677 case IX86_BUILTIN_GATHER3SIV8SF:
37678 icode = CODE_FOR_avx512vl_gathersiv8sf;
37680 case IX86_BUILTIN_GATHER3DIV4SF:
37681 icode = CODE_FOR_avx512vl_gatherdiv4sf;
37683 case IX86_BUILTIN_GATHER3DIV8SF:
37684 icode = CODE_FOR_avx512vl_gatherdiv8sf;
37686 case IX86_BUILTIN_GATHER3SIV2DI:
37687 icode = CODE_FOR_avx512vl_gathersiv2di;
37689 case IX86_BUILTIN_GATHER3SIV4DI:
37690 icode = CODE_FOR_avx512vl_gathersiv4di;
37692 case IX86_BUILTIN_GATHER3DIV2DI:
37693 icode = CODE_FOR_avx512vl_gatherdiv2di;
37695 case IX86_BUILTIN_GATHER3DIV4DI:
37696 icode = CODE_FOR_avx512vl_gatherdiv4di;
37698 case IX86_BUILTIN_GATHER3SIV4SI:
37699 icode = CODE_FOR_avx512vl_gathersiv4si;
37701 case IX86_BUILTIN_GATHER3SIV8SI:
37702 icode = CODE_FOR_avx512vl_gathersiv8si;
37704 case IX86_BUILTIN_GATHER3DIV4SI:
37705 icode = CODE_FOR_avx512vl_gatherdiv4si;
37707 case IX86_BUILTIN_GATHER3DIV8SI:
37708 icode = CODE_FOR_avx512vl_gatherdiv8si;
37710 case IX86_BUILTIN_GATHER3ALTSIV4DF:
37711 icode = CODE_FOR_avx512vl_gathersiv4df;
37713 case IX86_BUILTIN_GATHER3ALTDIV8SF:
37714 icode = CODE_FOR_avx512vl_gatherdiv8sf;
37716 case IX86_BUILTIN_GATHER3ALTSIV4DI:
37717 icode = CODE_FOR_avx512vl_gathersiv4di;
37719 case IX86_BUILTIN_GATHER3ALTDIV8SI:
37720 icode = CODE_FOR_avx512vl_gatherdiv8si;
37722 case IX86_BUILTIN_SCATTERSIV16SF:
37723 icode = CODE_FOR_avx512f_scattersiv16sf;
37725 case IX86_BUILTIN_SCATTERSIV8DF:
37726 icode = CODE_FOR_avx512f_scattersiv8df;
37728 case IX86_BUILTIN_SCATTERDIV16SF:
37729 icode = CODE_FOR_avx512f_scatterdiv16sf;
37731 case IX86_BUILTIN_SCATTERDIV8DF:
37732 icode = CODE_FOR_avx512f_scatterdiv8df;
37734 case IX86_BUILTIN_SCATTERSIV16SI:
37735 icode = CODE_FOR_avx512f_scattersiv16si;
37737 case IX86_BUILTIN_SCATTERSIV8DI:
37738 icode = CODE_FOR_avx512f_scattersiv8di;
37740 case IX86_BUILTIN_SCATTERDIV16SI:
37741 icode = CODE_FOR_avx512f_scatterdiv16si;
37743 case IX86_BUILTIN_SCATTERDIV8DI:
37744 icode = CODE_FOR_avx512f_scatterdiv8di;
37746 case IX86_BUILTIN_SCATTERSIV8SF:
37747 icode = CODE_FOR_avx512vl_scattersiv8sf;
37749 case IX86_BUILTIN_SCATTERSIV4SF:
37750 icode = CODE_FOR_avx512vl_scattersiv4sf;
37752 case IX86_BUILTIN_SCATTERSIV4DF:
37753 icode = CODE_FOR_avx512vl_scattersiv4df;
37755 case IX86_BUILTIN_SCATTERSIV2DF:
37756 icode = CODE_FOR_avx512vl_scattersiv2df;
37758 case IX86_BUILTIN_SCATTERDIV8SF:
37759 icode = CODE_FOR_avx512vl_scatterdiv8sf;
37761 case IX86_BUILTIN_SCATTERDIV4SF:
37762 icode = CODE_FOR_avx512vl_scatterdiv4sf;
37764 case IX86_BUILTIN_SCATTERDIV4DF:
37765 icode = CODE_FOR_avx512vl_scatterdiv4df;
37767 case IX86_BUILTIN_SCATTERDIV2DF:
37768 icode = CODE_FOR_avx512vl_scatterdiv2df;
37770 case IX86_BUILTIN_SCATTERSIV8SI:
37771 icode = CODE_FOR_avx512vl_scattersiv8si;
37773 case IX86_BUILTIN_SCATTERSIV4SI:
37774 icode = CODE_FOR_avx512vl_scattersiv4si;
37776 case IX86_BUILTIN_SCATTERSIV4DI:
37777 icode = CODE_FOR_avx512vl_scattersiv4di;
37779 case IX86_BUILTIN_SCATTERSIV2DI:
37780 icode = CODE_FOR_avx512vl_scattersiv2di;
37782 case IX86_BUILTIN_SCATTERDIV8SI:
37783 icode = CODE_FOR_avx512vl_scatterdiv8si;
37785 case IX86_BUILTIN_SCATTERDIV4SI:
37786 icode = CODE_FOR_avx512vl_scatterdiv4si;
37788 case IX86_BUILTIN_SCATTERDIV4DI:
37789 icode = CODE_FOR_avx512vl_scatterdiv4di;
37791 case IX86_BUILTIN_SCATTERDIV2DI:
37792 icode = CODE_FOR_avx512vl_scatterdiv2di;
37794 case IX86_BUILTIN_GATHERPFDPD:
37795 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
37796 goto vec_prefetch_gen;
37797 case IX86_BUILTIN_SCATTERALTSIV8DF:
37798 icode = CODE_FOR_avx512f_scattersiv8df;
37800 case IX86_BUILTIN_SCATTERALTDIV16SF:
37801 icode = CODE_FOR_avx512f_scatterdiv16sf;
37803 case IX86_BUILTIN_SCATTERALTSIV8DI:
37804 icode = CODE_FOR_avx512f_scattersiv8di;
37806 case IX86_BUILTIN_SCATTERALTDIV16SI:
37807 icode = CODE_FOR_avx512f_scatterdiv16si;
37809 case IX86_BUILTIN_SCATTERALTSIV4DF:
37810 icode = CODE_FOR_avx512vl_scattersiv4df;
37812 case IX86_BUILTIN_SCATTERALTDIV8SF:
37813 icode = CODE_FOR_avx512vl_scatterdiv8sf;
37815 case IX86_BUILTIN_SCATTERALTSIV4DI:
37816 icode = CODE_FOR_avx512vl_scattersiv4di;
37818 case IX86_BUILTIN_SCATTERALTDIV8SI:
37819 icode = CODE_FOR_avx512vl_scatterdiv8si;
37821 case IX86_BUILTIN_SCATTERALTSIV2DF:
37822 icode = CODE_FOR_avx512vl_scattersiv2df;
37824 case IX86_BUILTIN_SCATTERALTDIV4SF:
37825 icode = CODE_FOR_avx512vl_scatterdiv4sf;
37827 case IX86_BUILTIN_SCATTERALTSIV2DI:
37828 icode = CODE_FOR_avx512vl_scattersiv2di;
37830 case IX86_BUILTIN_SCATTERALTDIV4SI:
37831 icode = CODE_FOR_avx512vl_scatterdiv4si;
37833 case IX86_BUILTIN_GATHERPFDPS:
37834 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
37835 goto vec_prefetch_gen;
37836 case IX86_BUILTIN_GATHERPFQPD:
37837 icode = CODE_FOR_avx512pf_gatherpfv8didf;
37838 goto vec_prefetch_gen;
37839 case IX86_BUILTIN_GATHERPFQPS:
37840 icode = CODE_FOR_avx512pf_gatherpfv8disf;
37841 goto vec_prefetch_gen;
37842 case IX86_BUILTIN_SCATTERPFDPD:
37843 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
37844 goto vec_prefetch_gen;
37845 case IX86_BUILTIN_SCATTERPFDPS:
37846 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
37847 goto vec_prefetch_gen;
37848 case IX86_BUILTIN_SCATTERPFQPD:
37849 icode = CODE_FOR_avx512pf_scatterpfv8didf;
37850 goto vec_prefetch_gen;
37851 case IX86_BUILTIN_SCATTERPFQPS:
37852 icode = CODE_FOR_avx512pf_scatterpfv8disf;
37853 goto vec_prefetch_gen;
37857 rtx (*gen) (rtx, rtx);
37859 arg0 = CALL_EXPR_ARG (exp, 0);
37860 arg1 = CALL_EXPR_ARG (exp, 1);
37861 arg2 = CALL_EXPR_ARG (exp, 2);
37862 arg3 = CALL_EXPR_ARG (exp, 3);
37863 arg4 = CALL_EXPR_ARG (exp, 4);
37864 op0 = expand_normal (arg0);
37865 op1 = expand_normal (arg1);
37866 op2 = expand_normal (arg2);
37867 op3 = expand_normal (arg3);
37868 op4 = expand_normal (arg4);
37869 /* Note the arg order is different from the operand order. */
37870 mode0 = insn_data[icode].operand[1].mode;
37871 mode2 = insn_data[icode].operand[3].mode;
37872 mode3 = insn_data[icode].operand[4].mode;
37873 mode4 = insn_data[icode].operand[5].mode;
37875 if (target == NULL_RTX
37876 || GET_MODE (target) != insn_data[icode].operand[0].mode
37877 || !insn_data[icode].operand[0].predicate (target,
37878 GET_MODE (target)))
37879 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
37881 subtarget = target;
37885 case IX86_BUILTIN_GATHER3ALTSIV8DF:
37886 case IX86_BUILTIN_GATHER3ALTSIV8DI:
37887 half = gen_reg_rtx (V8SImode);
37888 if (!nonimmediate_operand (op2, V16SImode))
37889 op2 = copy_to_mode_reg (V16SImode, op2);
37890 emit_insn (gen_vec_extract_lo_v16si (half, op2));
37893 case IX86_BUILTIN_GATHER3ALTSIV4DF:
37894 case IX86_BUILTIN_GATHER3ALTSIV4DI:
37895 case IX86_BUILTIN_GATHERALTSIV4DF:
37896 case IX86_BUILTIN_GATHERALTSIV4DI:
37897 half = gen_reg_rtx (V4SImode);
37898 if (!nonimmediate_operand (op2, V8SImode))
37899 op2 = copy_to_mode_reg (V8SImode, op2);
37900 emit_insn (gen_vec_extract_lo_v8si (half, op2));
37903 case IX86_BUILTIN_GATHER3ALTDIV16SF:
37904 case IX86_BUILTIN_GATHER3ALTDIV16SI:
37905 half = gen_reg_rtx (mode0);
37906 if (mode0 == V8SFmode)
37907 gen = gen_vec_extract_lo_v16sf;
37909 gen = gen_vec_extract_lo_v16si;
37910 if (!nonimmediate_operand (op0, GET_MODE (op0)))
37911 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
37912 emit_insn (gen (half, op0));
37914 op3 = lowpart_subreg (QImode, op3, HImode);
37916 case IX86_BUILTIN_GATHER3ALTDIV8SF:
37917 case IX86_BUILTIN_GATHER3ALTDIV8SI:
37918 case IX86_BUILTIN_GATHERALTDIV8SF:
37919 case IX86_BUILTIN_GATHERALTDIV8SI:
37920 half = gen_reg_rtx (mode0);
37921 if (mode0 == V4SFmode)
37922 gen = gen_vec_extract_lo_v8sf;
37924 gen = gen_vec_extract_lo_v8si;
37925 if (!nonimmediate_operand (op0, GET_MODE (op0)))
37926 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
37927 emit_insn (gen (half, op0));
37929 if (VECTOR_MODE_P (GET_MODE (op3)))
37931 half = gen_reg_rtx (mode0);
37932 if (!nonimmediate_operand (op3, GET_MODE (op3)))
37933 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
37934 emit_insn (gen (half, op3));
37942 /* Force memory operand only with base register here. But we
37943 don't want to do it on memory operand for other builtin
37945 op1 = ix86_zero_extend_to_Pmode (op1);
37947 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37948 op0 = copy_to_mode_reg (mode0, op0);
37949 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
37950 op1 = copy_to_mode_reg (Pmode, op1);
37951 if (!insn_data[icode].operand[3].predicate (op2, mode2))
37952 op2 = copy_to_mode_reg (mode2, op2);
37954 op3 = fixup_modeless_constant (op3, mode3);
37956 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
37958 if (!insn_data[icode].operand[4].predicate (op3, mode3))
37959 op3 = copy_to_mode_reg (mode3, op3);
37963 op3 = copy_to_reg (op3);
37964 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
37966 if (!insn_data[icode].operand[5].predicate (op4, mode4))
37968 error ("the last argument must be scale 1, 2, 4, 8");
37972 /* Optimize. If mask is known to have all high bits set,
37973 replace op0 with pc_rtx to signal that the instruction
37974 overwrites the whole destination and doesn't use its
37975 previous contents. */
37978 if (TREE_CODE (arg3) == INTEGER_CST)
37980 if (integer_all_onesp (arg3))
37983 else if (TREE_CODE (arg3) == VECTOR_CST)
37985 unsigned int negative = 0;
37986 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
37988 tree cst = VECTOR_CST_ELT (arg3, i);
37989 if (TREE_CODE (cst) == INTEGER_CST
37990 && tree_int_cst_sign_bit (cst))
37992 else if (TREE_CODE (cst) == REAL_CST
37993 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
37996 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
37999 else if (TREE_CODE (arg3) == SSA_NAME
38000 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
38002 /* Recognize also when mask is like:
38003 __v2df src = _mm_setzero_pd ();
38004 __v2df mask = _mm_cmpeq_pd (src, src);
38006 __v8sf src = _mm256_setzero_ps ();
38007 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
38008 as that is a cheaper way to load all ones into
38009 a register than having to load a constant from
38011 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
38012 if (is_gimple_call (def_stmt))
38014 tree fndecl = gimple_call_fndecl (def_stmt);
38016 && fndecl_built_in_p (fndecl, BUILT_IN_MD))
38017 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
38019 case IX86_BUILTIN_CMPPD:
38020 case IX86_BUILTIN_CMPPS:
38021 case IX86_BUILTIN_CMPPD256:
38022 case IX86_BUILTIN_CMPPS256:
38023 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
38026 case IX86_BUILTIN_CMPEQPD:
38027 case IX86_BUILTIN_CMPEQPS:
38028 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
38029 && initializer_zerop (gimple_call_arg (def_stmt,
38040 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
38047 case IX86_BUILTIN_GATHER3DIV16SF:
38048 if (target == NULL_RTX)
38049 target = gen_reg_rtx (V8SFmode);
38050 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
38052 case IX86_BUILTIN_GATHER3DIV16SI:
38053 if (target == NULL_RTX)
38054 target = gen_reg_rtx (V8SImode);
38055 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
38057 case IX86_BUILTIN_GATHER3DIV8SF:
38058 case IX86_BUILTIN_GATHERDIV8SF:
38059 if (target == NULL_RTX)
38060 target = gen_reg_rtx (V4SFmode);
38061 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
38063 case IX86_BUILTIN_GATHER3DIV8SI:
38064 case IX86_BUILTIN_GATHERDIV8SI:
38065 if (target == NULL_RTX)
38066 target = gen_reg_rtx (V4SImode);
38067 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
38070 target = subtarget;
38076 arg0 = CALL_EXPR_ARG (exp, 0);
38077 arg1 = CALL_EXPR_ARG (exp, 1);
38078 arg2 = CALL_EXPR_ARG (exp, 2);
38079 arg3 = CALL_EXPR_ARG (exp, 3);
38080 arg4 = CALL_EXPR_ARG (exp, 4);
38081 op0 = expand_normal (arg0);
38082 op1 = expand_normal (arg1);
38083 op2 = expand_normal (arg2);
38084 op3 = expand_normal (arg3);
38085 op4 = expand_normal (arg4);
38086 mode1 = insn_data[icode].operand[1].mode;
38087 mode2 = insn_data[icode].operand[2].mode;
38088 mode3 = insn_data[icode].operand[3].mode;
38089 mode4 = insn_data[icode].operand[4].mode;
38091 /* Scatter instruction stores operand op3 to memory with
38092 indices from op2 and scale from op4 under writemask op1.
38093 If index operand op2 has more elements then source operand
38094 op3 one need to use only its low half. And vice versa. */
38097 case IX86_BUILTIN_SCATTERALTSIV8DF:
38098 case IX86_BUILTIN_SCATTERALTSIV8DI:
38099 half = gen_reg_rtx (V8SImode);
38100 if (!nonimmediate_operand (op2, V16SImode))
38101 op2 = copy_to_mode_reg (V16SImode, op2);
38102 emit_insn (gen_vec_extract_lo_v16si (half, op2));
38105 case IX86_BUILTIN_SCATTERALTDIV16SF:
38106 case IX86_BUILTIN_SCATTERALTDIV16SI:
38107 half = gen_reg_rtx (mode3);
38108 if (mode3 == V8SFmode)
38109 gen = gen_vec_extract_lo_v16sf;
38111 gen = gen_vec_extract_lo_v16si;
38112 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38113 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38114 emit_insn (gen (half, op3));
38117 case IX86_BUILTIN_SCATTERALTSIV4DF:
38118 case IX86_BUILTIN_SCATTERALTSIV4DI:
38119 half = gen_reg_rtx (V4SImode);
38120 if (!nonimmediate_operand (op2, V8SImode))
38121 op2 = copy_to_mode_reg (V8SImode, op2);
38122 emit_insn (gen_vec_extract_lo_v8si (half, op2));
38125 case IX86_BUILTIN_SCATTERALTDIV8SF:
38126 case IX86_BUILTIN_SCATTERALTDIV8SI:
38127 half = gen_reg_rtx (mode3);
38128 if (mode3 == V4SFmode)
38129 gen = gen_vec_extract_lo_v8sf;
38131 gen = gen_vec_extract_lo_v8si;
38132 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38133 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38134 emit_insn (gen (half, op3));
38137 case IX86_BUILTIN_SCATTERALTSIV2DF:
38138 case IX86_BUILTIN_SCATTERALTSIV2DI:
38139 if (!nonimmediate_operand (op2, V4SImode))
38140 op2 = copy_to_mode_reg (V4SImode, op2);
38142 case IX86_BUILTIN_SCATTERALTDIV4SF:
38143 case IX86_BUILTIN_SCATTERALTDIV4SI:
38144 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38145 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38151 /* Force memory operand only with base register here. But we
38152 don't want to do it on memory operand for other builtin
38154 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
38156 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38157 op0 = copy_to_mode_reg (Pmode, op0);
38159 op1 = fixup_modeless_constant (op1, mode1);
38161 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
38163 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38164 op1 = copy_to_mode_reg (mode1, op1);
38168 op1 = copy_to_reg (op1);
38169 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
38172 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38173 op2 = copy_to_mode_reg (mode2, op2);
38175 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38176 op3 = copy_to_mode_reg (mode3, op3);
38178 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38180 error ("the last argument must be scale 1, 2, 4, 8");
38184 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38192 arg0 = CALL_EXPR_ARG (exp, 0);
38193 arg1 = CALL_EXPR_ARG (exp, 1);
38194 arg2 = CALL_EXPR_ARG (exp, 2);
38195 arg3 = CALL_EXPR_ARG (exp, 3);
38196 arg4 = CALL_EXPR_ARG (exp, 4);
38197 op0 = expand_normal (arg0);
38198 op1 = expand_normal (arg1);
38199 op2 = expand_normal (arg2);
38200 op3 = expand_normal (arg3);
38201 op4 = expand_normal (arg4);
38202 mode0 = insn_data[icode].operand[0].mode;
38203 mode1 = insn_data[icode].operand[1].mode;
38204 mode3 = insn_data[icode].operand[3].mode;
38205 mode4 = insn_data[icode].operand[4].mode;
38207 op0 = fixup_modeless_constant (op0, mode0);
38209 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
38211 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38212 op0 = copy_to_mode_reg (mode0, op0);
38216 op0 = copy_to_reg (op0);
38217 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
38220 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38221 op1 = copy_to_mode_reg (mode1, op1);
38223 /* Force memory operand only with base register here. But we
38224 don't want to do it on memory operand for other builtin
38226 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
38228 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
38229 op2 = copy_to_mode_reg (Pmode, op2);
38231 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38233 error ("the forth argument must be scale 1, 2, 4, 8");
38237 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38239 error ("incorrect hint operand");
38243 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38251 case IX86_BUILTIN_XABORT:
38252 icode = CODE_FOR_xabort;
38253 arg0 = CALL_EXPR_ARG (exp, 0);
38254 op0 = expand_normal (arg0);
38255 mode0 = insn_data[icode].operand[0].mode;
38256 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38258 error ("the xabort's argument must be an 8-bit immediate");
38261 emit_insn (gen_xabort (op0));
38264 case IX86_BUILTIN_RSTORSSP:
38265 case IX86_BUILTIN_CLRSSBSY:
38266 arg0 = CALL_EXPR_ARG (exp, 0);
38267 op0 = expand_normal (arg0);
38268 icode = (fcode == IX86_BUILTIN_RSTORSSP
38269 ? CODE_FOR_rstorssp
38270 : CODE_FOR_clrssbsy);
38271 if (!address_operand (op0, VOIDmode))
38273 op1 = convert_memory_address (Pmode, op0);
38274 op0 = copy_addr_to_reg (op1);
38276 emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
38279 case IX86_BUILTIN_WRSSD:
38280 case IX86_BUILTIN_WRSSQ:
38281 case IX86_BUILTIN_WRUSSD:
38282 case IX86_BUILTIN_WRUSSQ:
38283 arg0 = CALL_EXPR_ARG (exp, 0);
38284 op0 = expand_normal (arg0);
38285 arg1 = CALL_EXPR_ARG (exp, 1);
38286 op1 = expand_normal (arg1);
38289 case IX86_BUILTIN_WRSSD:
38290 icode = CODE_FOR_wrsssi;
38293 case IX86_BUILTIN_WRSSQ:
38294 icode = CODE_FOR_wrssdi;
38297 case IX86_BUILTIN_WRUSSD:
38298 icode = CODE_FOR_wrusssi;
38301 case IX86_BUILTIN_WRUSSQ:
38302 icode = CODE_FOR_wrussdi;
38306 op0 = force_reg (mode, op0);
38307 if (!address_operand (op1, VOIDmode))
38309 op2 = convert_memory_address (Pmode, op1);
38310 op1 = copy_addr_to_reg (op2);
38312 emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
38319 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
38320 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
38322 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
38323 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
38327 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
38328 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST)
38330 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST;
38331 return ix86_expand_special_args_builtin (bdesc_special_args2 + i, exp,
38335 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
38336 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
38338 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
38341 case IX86_BUILTIN_FABSQ:
38342 case IX86_BUILTIN_COPYSIGNQ:
38344 /* Emit a normal call if SSE isn't available. */
38345 return expand_call (exp, target, ignore);
38348 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
38352 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
38353 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
38355 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
38356 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
38357 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
38358 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
38360 machine_mode mode, wide_mode, nar_mode;
38362 nar_mode = V4SFmode;
38364 wide_mode = V64SFmode;
38365 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
38366 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
38370 case IX86_BUILTIN_4FMAPS:
38371 fcn = gen_avx5124fmaddps_4fmaddps;
38375 case IX86_BUILTIN_4DPWSSD:
38376 nar_mode = V4SImode;
38378 wide_mode = V64SImode;
38379 fcn = gen_avx5124vnniw_vp4dpwssd;
38383 case IX86_BUILTIN_4DPWSSDS:
38384 nar_mode = V4SImode;
38386 wide_mode = V64SImode;
38387 fcn = gen_avx5124vnniw_vp4dpwssds;
38391 case IX86_BUILTIN_4FNMAPS:
38392 fcn = gen_avx5124fmaddps_4fnmaddps;
38396 case IX86_BUILTIN_4FNMAPS_MASK:
38397 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
38398 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
38401 case IX86_BUILTIN_4DPWSSD_MASK:
38402 nar_mode = V4SImode;
38404 wide_mode = V64SImode;
38405 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
38406 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
38409 case IX86_BUILTIN_4DPWSSDS_MASK:
38410 nar_mode = V4SImode;
38412 wide_mode = V64SImode;
38413 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
38414 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
38417 case IX86_BUILTIN_4FMAPS_MASK:
38427 wide_reg = gen_reg_rtx (wide_mode);
38428 for (i = 0; i < 4; i++)
38430 args[i] = CALL_EXPR_ARG (exp, i);
38431 ops[i] = expand_normal (args[i]);
38433 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
38437 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38438 accum = force_reg (mode, accum);
38440 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38441 addr = force_reg (Pmode, addr);
38443 mem = gen_rtx_MEM (nar_mode, addr);
38445 target = gen_reg_rtx (mode);
38447 emit_move_insn (target, accum);
38450 emit_insn (fcn (target, accum, wide_reg, mem));
38454 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38456 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38458 if (CONST_INT_P (mask))
38459 mask = fixup_modeless_constant (mask, HImode);
38461 mask = force_reg (HImode, mask);
38463 if (GET_MODE (mask) != HImode)
38464 mask = gen_rtx_SUBREG (HImode, mask, 0);
38466 /* If merge is 0 then we're about to emit z-masked variant. */
38467 if (const0_operand (merge, mode))
38468 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38469 /* If merge is the same as accum then emit merge-masked variant. */
38470 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38472 merge = force_reg (mode, merge);
38473 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38475 /* Merge with something unknown might happen if we z-mask w/ -O0. */
38478 target = gen_reg_rtx (mode);
38479 emit_move_insn (target, merge);
38480 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38486 case IX86_BUILTIN_4FNMASS:
38487 fcn = gen_avx5124fmaddps_4fnmaddss;
38491 case IX86_BUILTIN_4FMASS:
38492 fcn = gen_avx5124fmaddps_4fmaddss;
38496 case IX86_BUILTIN_4FNMASS_MASK:
38497 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
38498 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
38501 case IX86_BUILTIN_4FMASS_MASK:
38510 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
38511 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
38515 wide_reg = gen_reg_rtx (V64SFmode);
38516 for (i = 0; i < 4; i++)
38519 args[i] = CALL_EXPR_ARG (exp, i);
38520 ops[i] = expand_normal (args[i]);
38522 tmp = gen_reg_rtx (SFmode);
38523 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
38525 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
38526 gen_rtx_SUBREG (V16SFmode, tmp, 0));
38529 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38530 accum = force_reg (V4SFmode, accum);
38532 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38533 addr = force_reg (Pmode, addr);
38535 mem = gen_rtx_MEM (V4SFmode, addr);
38537 target = gen_reg_rtx (V4SFmode);
38539 emit_move_insn (target, accum);
38542 emit_insn (fcn (target, accum, wide_reg, mem));
38546 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38548 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38550 if (CONST_INT_P (mask))
38551 mask = fixup_modeless_constant (mask, QImode);
38553 mask = force_reg (QImode, mask);
38555 if (GET_MODE (mask) != QImode)
38556 mask = gen_rtx_SUBREG (QImode, mask, 0);
38558 /* If merge is 0 then we're about to emit z-masked variant. */
38559 if (const0_operand (merge, mode))
38560 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38561 /* If merge is the same as accum then emit merge-masked
38563 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38565 merge = force_reg (mode, merge);
38566 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38568 /* Merge with something unknown might happen if we z-mask
38572 target = gen_reg_rtx (mode);
38573 emit_move_insn (target, merge);
38574 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38579 case IX86_BUILTIN_RDPID:
38580 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
38583 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
38587 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
38588 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
38590 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
38591 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
38594 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
38595 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
38597 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
38598 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
38601 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
38602 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
38604 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
38605 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
38608 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
38609 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
38611 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
38612 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
38615 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
38616 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
38618 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
38619 const struct builtin_description *d = bdesc_multi_arg + i;
38620 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
38621 (enum ix86_builtin_func_type)
38622 d->flag, d->comparison);
38625 if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
38626 && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
38628 i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
38629 return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
38633 if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
38634 && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
38636 i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
38637 return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp,
38641 gcc_unreachable ();
38644 /* This returns the target-specific builtin with code CODE if
38645 current_function_decl has visibility on this builtin, which is checked
38646 using isa flags. Returns NULL_TREE otherwise. */
38648 static tree ix86_get_builtin (enum ix86_builtins code)
38650 struct cl_target_option *opts;
38651 tree target_tree = NULL_TREE;
38653 /* Determine the isa flags of current_function_decl. */
38655 if (current_function_decl)
38656 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
38658 if (target_tree == NULL)
38659 target_tree = target_option_default_node;
38661 opts = TREE_TARGET_OPTION (target_tree);
38663 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
38664 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
38665 return ix86_builtin_decl (code, true);
38670 /* Returns a function decl for a vectorized version of the combined function
38671 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
38672 if it is not available. */
38675 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
38678 machine_mode in_mode, out_mode;
38681 if (TREE_CODE (type_out) != VECTOR_TYPE
38682 || TREE_CODE (type_in) != VECTOR_TYPE)
38685 out_mode = TYPE_MODE (TREE_TYPE (type_out));
38686 out_n = TYPE_VECTOR_SUBPARTS (type_out);
38687 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38688 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38693 if (out_mode == SFmode && in_mode == SFmode)
38695 if (out_n == 16 && in_n == 16)
38696 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
38703 /* The round insn does not trap on denormals. */
38704 if (flag_trapping_math || !TARGET_SSE4_1)
38707 if (out_mode == SImode && in_mode == DFmode)
38709 if (out_n == 4 && in_n == 2)
38710 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
38711 else if (out_n == 8 && in_n == 4)
38712 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
38713 else if (out_n == 16 && in_n == 8)
38714 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
38716 if (out_mode == SImode && in_mode == SFmode)
38718 if (out_n == 4 && in_n == 4)
38719 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
38720 else if (out_n == 8 && in_n == 8)
38721 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
38722 else if (out_n == 16 && in_n == 16)
38723 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
38730 /* The round insn does not trap on denormals. */
38731 if (flag_trapping_math || !TARGET_SSE4_1)
38734 if (out_mode == SImode && in_mode == DFmode)
38736 if (out_n == 4 && in_n == 2)
38737 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
38738 else if (out_n == 8 && in_n == 4)
38739 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
38740 else if (out_n == 16 && in_n == 8)
38741 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
38743 if (out_mode == SImode && in_mode == SFmode)
38745 if (out_n == 4 && in_n == 4)
38746 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
38747 else if (out_n == 8 && in_n == 8)
38748 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
38749 else if (out_n == 16 && in_n == 16)
38750 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
38757 if (out_mode == SImode && in_mode == DFmode)
38759 if (out_n == 4 && in_n == 2)
38760 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
38761 else if (out_n == 8 && in_n == 4)
38762 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
38763 else if (out_n == 16 && in_n == 8)
38764 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
38766 if (out_mode == SImode && in_mode == SFmode)
38768 if (out_n == 4 && in_n == 4)
38769 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
38770 else if (out_n == 8 && in_n == 8)
38771 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
38772 else if (out_n == 16 && in_n == 16)
38773 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
38780 /* The round insn does not trap on denormals. */
38781 if (flag_trapping_math || !TARGET_SSE4_1)
38784 if (out_mode == SImode && in_mode == DFmode)
38786 if (out_n == 4 && in_n == 2)
38787 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
38788 else if (out_n == 8 && in_n == 4)
38789 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
38790 else if (out_n == 16 && in_n == 8)
38791 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
38793 if (out_mode == SImode && in_mode == SFmode)
38795 if (out_n == 4 && in_n == 4)
38796 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
38797 else if (out_n == 8 && in_n == 8)
38798 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
38799 else if (out_n == 16 && in_n == 16)
38800 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
38805 /* The round insn does not trap on denormals. */
38806 if (flag_trapping_math || !TARGET_SSE4_1)
38809 if (out_mode == DFmode && in_mode == DFmode)
38811 if (out_n == 2 && in_n == 2)
38812 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
38813 else if (out_n == 4 && in_n == 4)
38814 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
38815 else if (out_n == 8 && in_n == 8)
38816 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
38818 if (out_mode == SFmode && in_mode == SFmode)
38820 if (out_n == 4 && in_n == 4)
38821 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
38822 else if (out_n == 8 && in_n == 8)
38823 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
38824 else if (out_n == 16 && in_n == 16)
38825 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
38830 /* The round insn does not trap on denormals. */
38831 if (flag_trapping_math || !TARGET_SSE4_1)
38834 if (out_mode == DFmode && in_mode == DFmode)
38836 if (out_n == 2 && in_n == 2)
38837 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
38838 else if (out_n == 4 && in_n == 4)
38839 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
38840 else if (out_n == 8 && in_n == 8)
38841 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
38843 if (out_mode == SFmode && in_mode == SFmode)
38845 if (out_n == 4 && in_n == 4)
38846 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
38847 else if (out_n == 8 && in_n == 8)
38848 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
38849 else if (out_n == 16 && in_n == 16)
38850 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
38855 /* The round insn does not trap on denormals. */
38856 if (flag_trapping_math || !TARGET_SSE4_1)
38859 if (out_mode == DFmode && in_mode == DFmode)
38861 if (out_n == 2 && in_n == 2)
38862 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
38863 else if (out_n == 4 && in_n == 4)
38864 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
38865 else if (out_n == 8 && in_n == 8)
38866 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
38868 if (out_mode == SFmode && in_mode == SFmode)
38870 if (out_n == 4 && in_n == 4)
38871 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
38872 else if (out_n == 8 && in_n == 8)
38873 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
38874 else if (out_n == 16 && in_n == 16)
38875 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
38880 /* The round insn does not trap on denormals. */
38881 if (flag_trapping_math || !TARGET_SSE4_1)
38884 if (out_mode == DFmode && in_mode == DFmode)
38886 if (out_n == 2 && in_n == 2)
38887 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
38888 else if (out_n == 4 && in_n == 4)
38889 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
38891 if (out_mode == SFmode && in_mode == SFmode)
38893 if (out_n == 4 && in_n == 4)
38894 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
38895 else if (out_n == 8 && in_n == 8)
38896 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
38901 if (out_mode == DFmode && in_mode == DFmode)
38903 if (out_n == 2 && in_n == 2)
38904 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
38905 if (out_n == 4 && in_n == 4)
38906 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
38908 if (out_mode == SFmode && in_mode == SFmode)
38910 if (out_n == 4 && in_n == 4)
38911 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
38912 if (out_n == 8 && in_n == 8)
38913 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
38921 /* Dispatch to a handler for a vectorization library. */
38922 if (ix86_veclib_handler)
38923 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
38928 /* Handler for an SVML-style interface to
38929 a library with vectorized intrinsics. */
38932 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
38935 tree fntype, new_fndecl, args;
38938 machine_mode el_mode, in_mode;
38941 /* The SVML is suitable for unsafe math only. */
38942 if (!flag_unsafe_math_optimizations)
38945 el_mode = TYPE_MODE (TREE_TYPE (type_out));
38946 n = TYPE_VECTOR_SUBPARTS (type_out);
38947 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38948 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38949 if (el_mode != in_mode
38973 if ((el_mode != DFmode || n != 2)
38974 && (el_mode != SFmode || n != 4))
38982 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
38983 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
38985 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
38986 strcpy (name, "vmlsLn4");
38987 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
38988 strcpy (name, "vmldLn2");
38991 sprintf (name, "vmls%s", bname+10);
38992 name[strlen (name)-1] = '4';
38995 sprintf (name, "vmld%s2", bname+10);
38997 /* Convert to uppercase. */
39001 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39005 fntype = build_function_type_list (type_out, type_in, NULL);
39007 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39009 /* Build a function declaration for the vectorized function. */
39010 new_fndecl = build_decl (BUILTINS_LOCATION,
39011 FUNCTION_DECL, get_identifier (name), fntype);
39012 TREE_PUBLIC (new_fndecl) = 1;
39013 DECL_EXTERNAL (new_fndecl) = 1;
39014 DECL_IS_NOVOPS (new_fndecl) = 1;
39015 TREE_READONLY (new_fndecl) = 1;
39020 /* Handler for an ACML-style interface to
39021 a library with vectorized intrinsics. */
39024 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
39026 char name[20] = "__vr.._";
39027 tree fntype, new_fndecl, args;
39030 machine_mode el_mode, in_mode;
39033 /* The ACML is 64bits only and suitable for unsafe math only as
39034 it does not correctly support parts of IEEE with the required
39035 precision such as denormals. */
39037 || !flag_unsafe_math_optimizations)
39040 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39041 n = TYPE_VECTOR_SUBPARTS (type_out);
39042 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39043 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39044 if (el_mode != in_mode
39056 if (el_mode == DFmode && n == 2)
39061 else if (el_mode == SFmode && n == 4)
39074 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39075 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39076 sprintf (name + 7, "%s", bname+10);
39079 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39083 fntype = build_function_type_list (type_out, type_in, NULL);
39085 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39087 /* Build a function declaration for the vectorized function. */
39088 new_fndecl = build_decl (BUILTINS_LOCATION,
39089 FUNCTION_DECL, get_identifier (name), fntype);
39090 TREE_PUBLIC (new_fndecl) = 1;
39091 DECL_EXTERNAL (new_fndecl) = 1;
39092 DECL_IS_NOVOPS (new_fndecl) = 1;
39093 TREE_READONLY (new_fndecl) = 1;
39098 /* Returns a decl of a function that implements gather load with
39099 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
39100 Return NULL_TREE if it is not available. */
39103 ix86_vectorize_builtin_gather (const_tree mem_vectype,
39104 const_tree index_type, int scale)
39107 enum ix86_builtins code;
39109 if (! TARGET_AVX2 || !TARGET_USE_GATHER)
39112 if ((TREE_CODE (index_type) != INTEGER_TYPE
39113 && !POINTER_TYPE_P (index_type))
39114 || (TYPE_MODE (index_type) != SImode
39115 && TYPE_MODE (index_type) != DImode))
39118 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39121 /* v*gather* insn sign extends index to pointer mode. */
39122 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39123 && TYPE_UNSIGNED (index_type))
39128 || (scale & (scale - 1)) != 0)
39131 si = TYPE_MODE (index_type) == SImode;
39132 switch (TYPE_MODE (mem_vectype))
39135 if (TARGET_AVX512VL)
39136 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
39138 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
39141 if (TARGET_AVX512VL)
39142 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
39144 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
39147 if (TARGET_AVX512VL)
39148 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
39150 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
39153 if (TARGET_AVX512VL)
39154 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
39156 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
39159 if (TARGET_AVX512VL)
39160 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
39162 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
39165 if (TARGET_AVX512VL)
39166 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
39168 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
39171 if (TARGET_AVX512VL)
39172 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
39174 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
39177 if (TARGET_AVX512VL)
39178 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
39180 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
39183 if (TARGET_AVX512F)
39184 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
39189 if (TARGET_AVX512F)
39190 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
39195 if (TARGET_AVX512F)
39196 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
39201 if (TARGET_AVX512F)
39202 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
39210 return ix86_get_builtin (code);
39213 /* Returns a decl of a function that implements scatter store with
39214 register type VECTYPE and index type INDEX_TYPE and SCALE.
39215 Return NULL_TREE if it is not available. */
39218 ix86_vectorize_builtin_scatter (const_tree vectype,
39219 const_tree index_type, int scale)
39222 enum ix86_builtins code;
39224 if (!TARGET_AVX512F)
39227 if ((TREE_CODE (index_type) != INTEGER_TYPE
39228 && !POINTER_TYPE_P (index_type))
39229 || (TYPE_MODE (index_type) != SImode
39230 && TYPE_MODE (index_type) != DImode))
39233 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39236 /* v*scatter* insn sign extends index to pointer mode. */
39237 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39238 && TYPE_UNSIGNED (index_type))
39241 /* Scale can be 1, 2, 4 or 8. */
39244 || (scale & (scale - 1)) != 0)
39247 si = TYPE_MODE (index_type) == SImode;
39248 switch (TYPE_MODE (vectype))
39251 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
39254 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
39257 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
39260 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
39263 if (TARGET_AVX512VL)
39264 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
39269 if (TARGET_AVX512VL)
39270 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
39275 if (TARGET_AVX512VL)
39276 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
39281 if (TARGET_AVX512VL)
39282 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
39287 if (TARGET_AVX512VL)
39288 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
39293 if (TARGET_AVX512VL)
39294 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
39299 if (TARGET_AVX512VL)
39300 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
39305 if (TARGET_AVX512VL)
39306 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
39314 return ix86_builtins[code];
39317 /* Return true if it is safe to use the rsqrt optabs to optimize
39323 return (TARGET_SSE_MATH
39324 && flag_finite_math_only
39325 && !flag_trapping_math
39326 && flag_unsafe_math_optimizations);
39329 /* Returns a code for a target-specific builtin that implements
39330 reciprocal of the function, or NULL_TREE if not available. */
39333 ix86_builtin_reciprocal (tree fndecl)
39335 switch (DECL_FUNCTION_CODE (fndecl))
39337 /* Vectorized version of sqrt to rsqrt conversion. */
39338 case IX86_BUILTIN_SQRTPS_NR:
39339 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
39341 case IX86_BUILTIN_SQRTPS_NR256:
39342 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
39349 /* Helper for avx_vpermilps256_operand et al. This is also used by
39350 the expansion functions to turn the parallel back into a mask.
39351 The return value is 0 for no match and the imm8+1 for a match. */
39354 avx_vpermilp_parallel (rtx par, machine_mode mode)
39356 unsigned i, nelt = GET_MODE_NUNITS (mode);
39358 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
39360 if (XVECLEN (par, 0) != (int) nelt)
39363 /* Validate that all of the elements are constants, and not totally
39364 out of range. Copy the data into an integral array to make the
39365 subsequent checks easier. */
39366 for (i = 0; i < nelt; ++i)
39368 rtx er = XVECEXP (par, 0, i);
39369 unsigned HOST_WIDE_INT ei;
39371 if (!CONST_INT_P (er))
39382 /* In the 512-bit DFmode case, we can only move elements within
39383 a 128-bit lane. First fill the second part of the mask,
39385 for (i = 4; i < 6; ++i)
39387 if (ipar[i] < 4 || ipar[i] >= 6)
39389 mask |= (ipar[i] - 4) << i;
39391 for (i = 6; i < 8; ++i)
39395 mask |= (ipar[i] - 6) << i;
39400 /* In the 256-bit DFmode case, we can only move elements within
39402 for (i = 0; i < 2; ++i)
39406 mask |= ipar[i] << i;
39408 for (i = 2; i < 4; ++i)
39412 mask |= (ipar[i] - 2) << i;
39417 /* In 512 bit SFmode case, permutation in the upper 256 bits
39418 must mirror the permutation in the lower 256-bits. */
39419 for (i = 0; i < 8; ++i)
39420 if (ipar[i] + 8 != ipar[i + 8])
39425 /* In 256 bit SFmode case, we have full freedom of
39426 movement within the low 128-bit lane, but the high 128-bit
39427 lane must mirror the exact same pattern. */
39428 for (i = 0; i < 4; ++i)
39429 if (ipar[i] + 4 != ipar[i + 4])
39436 /* In the 128-bit case, we've full freedom in the placement of
39437 the elements from the source operand. */
39438 for (i = 0; i < nelt; ++i)
39439 mask |= ipar[i] << (i * (nelt / 2));
39443 gcc_unreachable ();
39446 /* Make sure success has a non-zero value by adding one. */
39450 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
39451 the expansion functions to turn the parallel back into a mask.
39452 The return value is 0 for no match and the imm8+1 for a match. */
39455 avx_vperm2f128_parallel (rtx par, machine_mode mode)
39457 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
39459 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
39461 if (XVECLEN (par, 0) != (int) nelt)
39464 /* Validate that all of the elements are constants, and not totally
39465 out of range. Copy the data into an integral array to make the
39466 subsequent checks easier. */
39467 for (i = 0; i < nelt; ++i)
39469 rtx er = XVECEXP (par, 0, i);
39470 unsigned HOST_WIDE_INT ei;
39472 if (!CONST_INT_P (er))
39475 if (ei >= 2 * nelt)
39480 /* Validate that the halves of the permute are halves. */
39481 for (i = 0; i < nelt2 - 1; ++i)
39482 if (ipar[i] + 1 != ipar[i + 1])
39484 for (i = nelt2; i < nelt - 1; ++i)
39485 if (ipar[i] + 1 != ipar[i + 1])
39488 /* Reconstruct the mask. */
39489 for (i = 0; i < 2; ++i)
39491 unsigned e = ipar[i * nelt2];
39495 mask |= e << (i * 4);
39498 /* Make sure success has a non-zero value by adding one. */
39502 /* Return a register priority for hard reg REGNO. */
39504 ix86_register_priority (int hard_regno)
39506 /* ebp and r13 as the base always wants a displacement, r12 as the
39507 base always wants an index. So discourage their usage in an
39509 if (hard_regno == R12_REG || hard_regno == R13_REG)
39511 if (hard_regno == BP_REG)
39513 /* New x86-64 int registers result in bigger code size. Discourage
39515 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
39517 /* New x86-64 SSE registers result in bigger code size. Discourage
39519 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
39521 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
39523 /* Usage of AX register results in smaller code. Prefer it. */
39524 if (hard_regno == AX_REG)
39529 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
39531 Put float CONST_DOUBLE in the constant pool instead of fp regs.
39532 QImode must go into class Q_REGS.
39533 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
39534 movdf to do mem-to-mem moves through integer regs. */
39537 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
39539 machine_mode mode = GET_MODE (x);
39541 /* We're only allowed to return a subclass of CLASS. Many of the
39542 following checks fail for NO_REGS, so eliminate that early. */
39543 if (regclass == NO_REGS)
39546 /* All classes can load zeros. */
39547 if (x == CONST0_RTX (mode))
39550 /* Force constants into memory if we are loading a (nonzero) constant into
39551 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
39552 instructions to load from a constant. */
39554 && (MAYBE_MMX_CLASS_P (regclass)
39555 || MAYBE_SSE_CLASS_P (regclass)
39556 || MAYBE_MASK_CLASS_P (regclass)))
39559 /* Floating-point constants need more complex checks. */
39560 if (CONST_DOUBLE_P (x))
39562 /* General regs can load everything. */
39563 if (INTEGER_CLASS_P (regclass))
39566 /* Floats can load 0 and 1 plus some others. Note that we eliminated
39567 zero above. We only want to wind up preferring 80387 registers if
39568 we plan on doing computation with them. */
39569 if (IS_STACK_MODE (mode)
39570 && standard_80387_constant_p (x) > 0)
39572 /* Limit class to FP regs. */
39573 if (FLOAT_CLASS_P (regclass))
39580 /* Prefer SSE regs only, if we can use them for math. */
39581 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39582 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
39584 /* Generally when we see PLUS here, it's the function invariant
39585 (plus soft-fp const_int). Which can only be computed into general
39587 if (GET_CODE (x) == PLUS)
39588 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
39590 /* QImode constants are easy to load, but non-constant QImode data
39591 must go into Q_REGS. */
39592 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
39594 if (Q_CLASS_P (regclass))
39596 else if (reg_class_subset_p (Q_REGS, regclass))
39605 /* Discourage putting floating-point values in SSE registers unless
39606 SSE math is being used, and likewise for the 387 registers. */
39608 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
39610 machine_mode mode = GET_MODE (x);
39612 /* Restrict the output reload class to the register bank that we are doing
39613 math on. If we would like not to return a subset of CLASS, reject this
39614 alternative: if reload cannot do this, it will still use its choice. */
39615 mode = GET_MODE (x);
39616 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39617 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
39619 if (IS_STACK_MODE (mode))
39620 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
39626 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
39627 machine_mode mode, secondary_reload_info *sri)
39629 /* Double-word spills from general registers to non-offsettable memory
39630 references (zero-extended addresses) require special handling. */
39633 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
39634 && INTEGER_CLASS_P (rclass)
39635 && !offsettable_memref_p (x))
39638 ? CODE_FOR_reload_noff_load
39639 : CODE_FOR_reload_noff_store);
39640 /* Add the cost of moving address to a temporary. */
39641 sri->extra_cost = 1;
39646 /* QImode spills from non-QI registers require
39647 intermediate register on 32bit targets. */
39649 && ((!TARGET_64BIT && !in_p
39650 && INTEGER_CLASS_P (rclass)
39651 && MAYBE_NON_Q_CLASS_P (rclass))
39652 || (!TARGET_AVX512DQ
39653 && MAYBE_MASK_CLASS_P (rclass))))
39655 int regno = true_regnum (x);
39657 /* Return Q_REGS if the operand is in memory. */
39664 /* This condition handles corner case where an expression involving
39665 pointers gets vectorized. We're trying to use the address of a
39666 stack slot as a vector initializer.
39668 (set (reg:V2DI 74 [ vect_cst_.2 ])
39669 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
39671 Eventually frame gets turned into sp+offset like this:
39673 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39674 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39675 (const_int 392 [0x188]))))
39677 That later gets turned into:
39679 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39680 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39681 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
39683 We'll have the following reload recorded:
39685 Reload 0: reload_in (DI) =
39686 (plus:DI (reg/f:DI 7 sp)
39687 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
39688 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39689 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
39690 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
39691 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39692 reload_reg_rtx: (reg:V2DI 22 xmm1)
39694 Which isn't going to work since SSE instructions can't handle scalar
39695 additions. Returning GENERAL_REGS forces the addition into integer
39696 register and reload can handle subsequent reloads without problems. */
39698 if (in_p && GET_CODE (x) == PLUS
39699 && SSE_CLASS_P (rclass)
39700 && SCALAR_INT_MODE_P (mode))
39701 return GENERAL_REGS;
39706 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
39709 ix86_class_likely_spilled_p (reg_class_t rclass)
39720 case SSE_FIRST_REG:
39722 case FP_SECOND_REG:
39732 /* If we are copying between registers from different register sets
39733 (e.g. FP and integer), we may need a memory location.
39735 The function can't work reliably when one of the CLASSES is a class
39736 containing registers from multiple sets. We avoid this by never combining
39737 different sets in a single alternative in the machine description.
39738 Ensure that this constraint holds to avoid unexpected surprises.
39740 When STRICT is false, we are being called from REGISTER_MOVE_COST,
39741 so do not enforce these sanity checks.
39743 To optimize register_move_cost performance, define inline variant. */
39746 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
39747 reg_class_t class2, int strict)
39749 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
39752 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
39753 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
39754 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
39755 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
39756 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
39757 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
39758 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
39759 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
39761 gcc_assert (!strict || lra_in_progress);
39765 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
39768 /* Between mask and general, we have moves no larger than word size. */
39769 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
39770 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
39773 /* ??? This is a lie. We do have moves between mmx/general, and for
39774 mmx/sse2. But by saying we need secondary memory we discourage the
39775 register allocator from using the mmx registers unless needed. */
39776 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
39779 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
39781 /* SSE1 doesn't have any direct moves from other classes. */
39785 /* If the target says that inter-unit moves are more expensive
39786 than moving through memory, then don't generate them. */
39787 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
39788 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
39791 /* Between SSE and general, we have moves no larger than word size. */
39792 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39799 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
39802 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
39803 reg_class_t class2)
39805 return inline_secondary_memory_needed (mode, class1, class2, true);
39808 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
39810 get_secondary_mem widens integral modes to BITS_PER_WORD.
39811 There is no need to emit full 64 bit move on 64 bit targets
39812 for integral modes that can be moved using 32 bit move. */
39814 static machine_mode
39815 ix86_secondary_memory_needed_mode (machine_mode mode)
39817 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
39818 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
39822 /* Implement the TARGET_CLASS_MAX_NREGS hook.
39824 On the 80386, this is the size of MODE in words,
39825 except in the FP regs, where a single reg is always enough. */
39827 static unsigned char
39828 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
39830 if (MAYBE_INTEGER_CLASS_P (rclass))
39832 if (mode == XFmode)
39833 return (TARGET_64BIT ? 2 : 3);
39834 else if (mode == XCmode)
39835 return (TARGET_64BIT ? 4 : 6);
39837 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
39841 if (COMPLEX_MODE_P (mode))
39848 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
39851 ix86_can_change_mode_class (machine_mode from, machine_mode to,
39852 reg_class_t regclass)
39857 /* x87 registers can't do subreg at all, as all values are reformatted
39858 to extended precision. */
39859 if (MAYBE_FLOAT_CLASS_P (regclass))
39862 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
39864 /* Vector registers do not support QI or HImode loads. If we don't
39865 disallow a change to these modes, reload will assume it's ok to
39866 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
39867 the vec_dupv4hi pattern. */
39868 if (GET_MODE_SIZE (from) < 4)
39875 /* Return index of MODE in the sse load/store tables. */
39878 sse_store_index (machine_mode mode)
39880 switch (GET_MODE_SIZE (mode))
39897 /* Return the cost of moving data of mode M between a
39898 register and memory. A value of 2 is the default; this cost is
39899 relative to those in `REGISTER_MOVE_COST'.
39901 This function is used extensively by register_move_cost that is used to
39902 build tables at startup. Make it inline in this case.
39903 When IN is 2, return maximum of in and out move cost.
39905 If moving between registers and memory is more expensive than
39906 between two registers, you should define this macro to express the
39909 Model also increased moving costs of QImode registers in non
39913 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
39916 if (FLOAT_CLASS_P (regclass))
39934 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
39935 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
39937 if (SSE_CLASS_P (regclass))
39939 int index = sse_store_index (mode);
39943 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
39944 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
39946 if (MMX_CLASS_P (regclass))
39949 switch (GET_MODE_SIZE (mode))
39961 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
39962 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
39964 switch (GET_MODE_SIZE (mode))
39967 if (Q_CLASS_P (regclass) || TARGET_64BIT)
39970 return ix86_cost->int_store[0];
39971 if (TARGET_PARTIAL_REG_DEPENDENCY
39972 && optimize_function_for_speed_p (cfun))
39973 cost = ix86_cost->movzbl_load;
39975 cost = ix86_cost->int_load[0];
39977 return MAX (cost, ix86_cost->int_store[0]);
39983 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
39985 return ix86_cost->movzbl_load;
39987 return ix86_cost->int_store[0] + 4;
39992 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
39993 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
39996 cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]);
39998 cost = ix86_cost->int_load[2];
40000 cost = ix86_cost->int_store[2];
40001 /* Multiply with the number of GPR moves needed. */
40002 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
40007 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
40009 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
40013 /* Return the cost of moving data from a register in class CLASS1 to
40014 one in class CLASS2.
40016 It is not required that the cost always equal 2 when FROM is the same as TO;
40017 on some machines it is expensive to move between registers if they are not
40018 general registers. */
40021 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
40022 reg_class_t class2_i)
40024 enum reg_class class1 = (enum reg_class) class1_i;
40025 enum reg_class class2 = (enum reg_class) class2_i;
40027 /* In case we require secondary memory, compute cost of the store followed
40028 by load. In order to avoid bad register allocation choices, we need
40029 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
40031 if (inline_secondary_memory_needed (mode, class1, class2, false))
40035 cost += inline_memory_move_cost (mode, class1, 2);
40036 cost += inline_memory_move_cost (mode, class2, 2);
40038 /* In case of copying from general_purpose_register we may emit multiple
40039 stores followed by single load causing memory size mismatch stall.
40040 Count this as arbitrarily high cost of 20. */
40041 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
40042 && TARGET_MEMORY_MISMATCH_STALL
40043 && targetm.class_max_nregs (class1, mode)
40044 > targetm.class_max_nregs (class2, mode))
40047 /* In the case of FP/MMX moves, the registers actually overlap, and we
40048 have to switch modes in order to treat them differently. */
40049 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
40050 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
40056 /* Moves between SSE/MMX and integer unit are expensive. */
40057 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
40058 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40060 /* ??? By keeping returned value relatively high, we limit the number
40061 of moves between integer and MMX/SSE registers for all targets.
40062 Additionally, high value prevents problem with x86_modes_tieable_p(),
40063 where integer modes in MMX/SSE registers are not tieable
40064 because of missing QImode and HImode moves to, from or between
40065 MMX/SSE registers. */
40066 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
40067 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
40069 if (MAYBE_FLOAT_CLASS_P (class1))
40070 return ix86_cost->fp_move;
40071 if (MAYBE_SSE_CLASS_P (class1))
40073 if (GET_MODE_BITSIZE (mode) <= 128)
40074 return ix86_cost->xmm_move;
40075 if (GET_MODE_BITSIZE (mode) <= 256)
40076 return ix86_cost->ymm_move;
40077 return ix86_cost->zmm_move;
40079 if (MAYBE_MMX_CLASS_P (class1))
40080 return ix86_cost->mmx_move;
40084 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
40085 words of a value of mode MODE but can be less for certain modes in
40086 special long registers.
40088 Actually there are no two word move instructions for consecutive
40089 registers. And only registers 0-3 may have mov byte instructions
40090 applied to them. */
40092 static unsigned int
40093 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
40095 if (GENERAL_REGNO_P (regno))
40097 if (mode == XFmode)
40098 return TARGET_64BIT ? 2 : 3;
40099 if (mode == XCmode)
40100 return TARGET_64BIT ? 4 : 6;
40101 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40103 if (COMPLEX_MODE_P (mode))
40105 if (mode == V64SFmode || mode == V64SImode)
40110 /* Implement TARGET_HARD_REGNO_MODE_OK. */
40113 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
40115 /* Flags and only flags can only hold CCmode values. */
40116 if (CC_REGNO_P (regno))
40117 return GET_MODE_CLASS (mode) == MODE_CC;
40118 if (GET_MODE_CLASS (mode) == MODE_CC
40119 || GET_MODE_CLASS (mode) == MODE_RANDOM
40120 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
40122 if (STACK_REGNO_P (regno))
40123 return VALID_FP_MODE_P (mode);
40124 if (MASK_REGNO_P (regno))
40125 return (VALID_MASK_REG_MODE (mode)
40126 || (TARGET_AVX512BW
40127 && VALID_MASK_AVX512BW_MODE (mode)));
40128 if (SSE_REGNO_P (regno))
40130 /* We implement the move patterns for all vector modes into and
40131 out of SSE registers, even when no operation instructions
40134 /* For AVX-512 we allow, regardless of regno:
40136 - any of 512-bit wide vector mode
40137 - any scalar mode. */
40140 || VALID_AVX512F_REG_MODE (mode)
40141 || VALID_AVX512F_SCALAR_MODE (mode)))
40144 /* For AVX-5124FMAPS or AVX-5124VNNIW
40145 allow V64SF and V64SI modes for special regnos. */
40146 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40147 && (mode == V64SFmode || mode == V64SImode)
40148 && MOD4_SSE_REGNO_P (regno))
40151 /* TODO check for QI/HI scalars. */
40152 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
40153 if (TARGET_AVX512VL
40156 || VALID_AVX256_REG_MODE (mode)
40157 || VALID_AVX512VL_128_REG_MODE (mode)))
40160 /* xmm16-xmm31 are only available for AVX-512. */
40161 if (EXT_REX_SSE_REGNO_P (regno))
40164 /* OImode and AVX modes are available only when AVX is enabled. */
40165 return ((TARGET_AVX
40166 && VALID_AVX256_REG_OR_OI_MODE (mode))
40167 || VALID_SSE_REG_MODE (mode)
40168 || VALID_SSE2_REG_MODE (mode)
40169 || VALID_MMX_REG_MODE (mode)
40170 || VALID_MMX_REG_MODE_3DNOW (mode));
40172 if (MMX_REGNO_P (regno))
40174 /* We implement the move patterns for 3DNOW modes even in MMX mode,
40175 so if the register is available at all, then we can move data of
40176 the given mode into or out of it. */
40177 return (VALID_MMX_REG_MODE (mode)
40178 || VALID_MMX_REG_MODE_3DNOW (mode));
40181 if (mode == QImode)
40183 /* Take care for QImode values - they can be in non-QI regs,
40184 but then they do cause partial register stalls. */
40185 if (ANY_QI_REGNO_P (regno))
40187 if (!TARGET_PARTIAL_REG_STALL)
40189 /* LRA checks if the hard register is OK for the given mode.
40190 QImode values can live in non-QI regs, so we allow all
40192 if (lra_in_progress)
40194 return !can_create_pseudo_p ();
40196 /* We handle both integer and floats in the general purpose registers. */
40197 else if (VALID_INT_MODE_P (mode))
40199 else if (VALID_FP_MODE_P (mode))
40201 else if (VALID_DFP_MODE_P (mode))
40203 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
40204 on to use that value in smaller contexts, this can easily force a
40205 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
40206 supporting DImode, allow it. */
40207 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
40213 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
40214 saves SSE registers across calls is Win64 (thus no need to check the
40215 current ABI here), and with AVX enabled Win64 only guarantees that
40216 the low 16 bytes are saved. */
40219 ix86_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
40220 unsigned int regno, machine_mode mode)
40222 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
40225 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
40226 tieable integer mode. */
40229 ix86_tieable_integer_mode_p (machine_mode mode)
40238 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
40241 return TARGET_64BIT;
40248 /* Implement TARGET_MODES_TIEABLE_P.
40250 Return true if MODE1 is accessible in a register that can hold MODE2
40251 without copying. That is, all register classes that can hold MODE2
40252 can also hold MODE1. */
40255 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
40257 if (mode1 == mode2)
40260 if (ix86_tieable_integer_mode_p (mode1)
40261 && ix86_tieable_integer_mode_p (mode2))
40264 /* MODE2 being XFmode implies fp stack or general regs, which means we
40265 can tie any smaller floating point modes to it. Note that we do not
40266 tie this with TFmode. */
40267 if (mode2 == XFmode)
40268 return mode1 == SFmode || mode1 == DFmode;
40270 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
40271 that we can tie it with SFmode. */
40272 if (mode2 == DFmode)
40273 return mode1 == SFmode;
40275 /* If MODE2 is only appropriate for an SSE register, then tie with
40276 any other mode acceptable to SSE registers. */
40277 if (GET_MODE_SIZE (mode2) == 64
40278 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40279 return (GET_MODE_SIZE (mode1) == 64
40280 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40281 if (GET_MODE_SIZE (mode2) == 32
40282 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40283 return (GET_MODE_SIZE (mode1) == 32
40284 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40285 if (GET_MODE_SIZE (mode2) == 16
40286 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40287 return (GET_MODE_SIZE (mode1) == 16
40288 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40290 /* If MODE2 is appropriate for an MMX register, then tie
40291 with any other mode acceptable to MMX registers. */
40292 if (GET_MODE_SIZE (mode2) == 8
40293 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
40294 return (GET_MODE_SIZE (mode1) == 8
40295 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
40300 /* Return the cost of moving between two registers of mode MODE. */
40303 ix86_set_reg_reg_cost (machine_mode mode)
40305 unsigned int units = UNITS_PER_WORD;
40307 switch (GET_MODE_CLASS (mode))
40313 units = GET_MODE_SIZE (CCmode);
40317 if ((TARGET_SSE && mode == TFmode)
40318 || (TARGET_80387 && mode == XFmode)
40319 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
40320 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
40321 units = GET_MODE_SIZE (mode);
40324 case MODE_COMPLEX_FLOAT:
40325 if ((TARGET_SSE && mode == TCmode)
40326 || (TARGET_80387 && mode == XCmode)
40327 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
40328 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
40329 units = GET_MODE_SIZE (mode);
40332 case MODE_VECTOR_INT:
40333 case MODE_VECTOR_FLOAT:
40334 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40335 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40336 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40337 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40338 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
40339 units = GET_MODE_SIZE (mode);
40342 /* Return the cost of moving between two registers of mode MODE,
40343 assuming that the move will be in pieces of at most UNITS bytes. */
40344 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
40347 /* Return cost of vector operation in MODE given that scalar version has
40351 ix86_vec_cost (machine_mode mode, int cost)
40353 if (!VECTOR_MODE_P (mode))
40356 if (GET_MODE_BITSIZE (mode) == 128
40357 && TARGET_SSE_SPLIT_REGS)
40359 if (GET_MODE_BITSIZE (mode) > 128
40360 && TARGET_AVX128_OPTIMAL)
40361 return cost * GET_MODE_BITSIZE (mode) / 128;
40365 /* Return cost of multiplication in MODE. */
40368 ix86_multiplication_cost (const struct processor_costs *cost,
40369 enum machine_mode mode)
40371 machine_mode inner_mode = mode;
40372 if (VECTOR_MODE_P (mode))
40373 inner_mode = GET_MODE_INNER (mode);
40375 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40376 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
40377 else if (X87_FLOAT_MODE_P (mode))
40379 else if (FLOAT_MODE_P (mode))
40380 return ix86_vec_cost (mode,
40381 inner_mode == DFmode ? cost->mulsd : cost->mulss);
40382 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40384 /* vpmullq is used in this case. No emulation is needed. */
40385 if (TARGET_AVX512DQ)
40386 return ix86_vec_cost (mode, cost->mulss);
40388 /* V*QImode is emulated with 7-13 insns. */
40389 if (mode == V16QImode || mode == V32QImode)
40392 if (TARGET_XOP && mode == V16QImode)
40394 else if (TARGET_SSSE3)
40396 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
40398 /* V*DImode is emulated with 5-8 insns. */
40399 else if (mode == V2DImode || mode == V4DImode)
40401 if (TARGET_XOP && mode == V2DImode)
40402 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
40404 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
40406 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
40407 insns, including two PMULUDQ. */
40408 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
40409 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
40411 return ix86_vec_cost (mode, cost->mulss);
40414 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
40417 /* Return cost of multiplication in MODE. */
40420 ix86_division_cost (const struct processor_costs *cost,
40421 enum machine_mode mode)
40423 machine_mode inner_mode = mode;
40424 if (VECTOR_MODE_P (mode))
40425 inner_mode = GET_MODE_INNER (mode);
40427 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40428 return inner_mode == DFmode ? cost->divsd : cost->divss;
40429 else if (X87_FLOAT_MODE_P (mode))
40431 else if (FLOAT_MODE_P (mode))
40432 return ix86_vec_cost (mode,
40433 inner_mode == DFmode ? cost->divsd : cost->divss);
40435 return cost->divide[MODE_INDEX (mode)];
40438 /* Return cost of shift in MODE.
40439 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
40440 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
40441 if op1 is a result of subreg.
40443 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
40446 ix86_shift_rotate_cost (const struct processor_costs *cost,
40447 enum machine_mode mode, bool constant_op1,
40448 HOST_WIDE_INT op1_val,
40451 bool shift_and_truncate,
40452 bool *skip_op0, bool *skip_op1)
40455 *skip_op0 = *skip_op1 = false;
40456 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40458 /* V*QImode is emulated with 1-11 insns. */
40459 if (mode == V16QImode || mode == V32QImode)
40462 if (TARGET_XOP && mode == V16QImode)
40464 /* For XOP we use vpshab, which requires a broadcast of the
40465 value to the variable shift insn. For constants this
40466 means a V16Q const in mem; even when we can perform the
40467 shift with one insn set the cost to prefer paddb. */
40472 return ix86_vec_cost (mode,
40477 (GET_MODE_UNIT_SIZE (mode))));
40481 else if (TARGET_SSSE3)
40483 return ix86_vec_cost (mode, cost->sse_op * count);
40486 return ix86_vec_cost (mode, cost->sse_op);
40488 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40493 return cost->shift_const + COSTS_N_INSNS (2);
40495 return cost->shift_const * 2;
40500 return cost->shift_var * 2;
40502 return cost->shift_var * 6 + COSTS_N_INSNS (2);
40508 return cost->shift_const;
40509 else if (shift_and_truncate)
40512 *skip_op0 = *skip_op1 = true;
40513 /* Return the cost after shift-and truncation. */
40514 return cost->shift_var;
40517 return cost->shift_var;
40519 return cost->shift_const;
40522 /* Compute a (partial) cost for rtx X. Return true if the complete
40523 cost has been computed, and false if subexpressions should be
40524 scanned. In either case, *TOTAL contains the cost result. */
40527 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
40528 int *total, bool speed)
40531 enum rtx_code code = GET_CODE (x);
40532 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
40533 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
40539 if (register_operand (SET_DEST (x), VOIDmode)
40540 && register_operand (SET_SRC (x), VOIDmode))
40542 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
40546 if (register_operand (SET_SRC (x), VOIDmode))
40547 /* Avoid potentially incorrect high cost from rtx_costs
40548 for non-tieable SUBREGs. */
40552 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
40554 if (CONSTANT_P (SET_SRC (x)))
40555 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
40556 a small value, possibly zero for cheap constants. */
40557 src_cost += COSTS_N_INSNS (1);
40560 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
40567 if (x86_64_immediate_operand (x, VOIDmode))
40574 if (IS_STACK_MODE (mode))
40575 switch (standard_80387_constant_p (x))
40583 default: /* Other constants */
40590 switch (standard_sse_constant_p (x, mode))
40594 case 1: /* 0: xor eliminates false dependency */
40597 default: /* -1: cmp contains false dependency */
40603 case CONST_WIDE_INT:
40604 /* Fall back to (MEM (SYMBOL_REF)), since that's where
40605 it'll probably end up. Add a penalty for size. */
40606 *total = (COSTS_N_INSNS (1)
40607 + (!TARGET_64BIT && flag_pic)
40608 + (GET_MODE_SIZE (mode) <= 4
40609 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
40613 /* The zero extensions is often completely free on x86_64, so make
40614 it as cheap as possible. */
40615 if (TARGET_64BIT && mode == DImode
40616 && GET_MODE (XEXP (x, 0)) == SImode)
40618 else if (TARGET_ZERO_EXTEND_WITH_AND)
40619 *total = cost->add;
40621 *total = cost->movzx;
40625 *total = cost->movsx;
40629 if (SCALAR_INT_MODE_P (mode)
40630 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
40631 && CONST_INT_P (XEXP (x, 1)))
40633 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40636 *total = cost->add;
40639 if ((value == 2 || value == 3)
40640 && cost->lea <= cost->shift_const)
40642 *total = cost->lea;
40652 bool skip_op0, skip_op1;
40653 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
40654 CONST_INT_P (XEXP (x, 1))
40655 ? INTVAL (XEXP (x, 1)) : -1,
40657 GET_CODE (XEXP (x, 1)) == AND,
40658 SUBREG_P (XEXP (x, 1))
40659 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
40660 &skip_op0, &skip_op1);
40661 if (skip_op0 || skip_op1)
40664 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
40666 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
40675 gcc_assert (FLOAT_MODE_P (mode));
40676 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
40678 *total = ix86_vec_cost (mode,
40679 GET_MODE_INNER (mode) == SFmode
40680 ? cost->fmass : cost->fmasd);
40681 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
40683 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
40685 if (GET_CODE (sub) == NEG)
40686 sub = XEXP (sub, 0);
40687 *total += rtx_cost (sub, mode, FMA, 0, speed);
40690 if (GET_CODE (sub) == NEG)
40691 sub = XEXP (sub, 0);
40692 *total += rtx_cost (sub, mode, FMA, 2, speed);
40697 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
40699 rtx op0 = XEXP (x, 0);
40700 rtx op1 = XEXP (x, 1);
40702 if (CONST_INT_P (XEXP (x, 1)))
40704 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40705 for (nbits = 0; value != 0; value &= value - 1)
40709 /* This is arbitrary. */
40712 /* Compute costs correctly for widening multiplication. */
40713 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
40714 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
40715 == GET_MODE_SIZE (mode))
40717 int is_mulwiden = 0;
40718 machine_mode inner_mode = GET_MODE (op0);
40720 if (GET_CODE (op0) == GET_CODE (op1))
40721 is_mulwiden = 1, op1 = XEXP (op1, 0);
40722 else if (CONST_INT_P (op1))
40724 if (GET_CODE (op0) == SIGN_EXTEND)
40725 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
40728 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
40732 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
40735 *total = (cost->mult_init[MODE_INDEX (mode)]
40736 + nbits * cost->mult_bit
40737 + rtx_cost (op0, mode, outer_code, opno, speed)
40738 + rtx_cost (op1, mode, outer_code, opno, speed));
40742 *total = ix86_multiplication_cost (cost, mode);
40749 *total = ix86_division_cost (cost, mode);
40753 if (GET_MODE_CLASS (mode) == MODE_INT
40754 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
40756 if (GET_CODE (XEXP (x, 0)) == PLUS
40757 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
40758 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
40759 && CONSTANT_P (XEXP (x, 1)))
40761 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
40762 if (val == 2 || val == 4 || val == 8)
40764 *total = cost->lea;
40765 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
40766 outer_code, opno, speed);
40767 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
40768 outer_code, opno, speed);
40769 *total += rtx_cost (XEXP (x, 1), mode,
40770 outer_code, opno, speed);
40774 else if (GET_CODE (XEXP (x, 0)) == MULT
40775 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
40777 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
40778 if (val == 2 || val == 4 || val == 8)
40780 *total = cost->lea;
40781 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
40782 outer_code, opno, speed);
40783 *total += rtx_cost (XEXP (x, 1), mode,
40784 outer_code, opno, speed);
40788 else if (GET_CODE (XEXP (x, 0)) == PLUS)
40790 /* Add with carry, ignore the cost of adding a carry flag. */
40791 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
40792 *total = cost->add;
40795 *total = cost->lea;
40796 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
40797 outer_code, opno, speed);
40800 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
40801 outer_code, opno, speed);
40802 *total += rtx_cost (XEXP (x, 1), mode,
40803 outer_code, opno, speed);
40810 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
40811 if (GET_MODE_CLASS (mode) == MODE_INT
40812 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
40813 && GET_CODE (XEXP (x, 0)) == MINUS
40814 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
40816 *total = cost->add;
40817 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
40818 outer_code, opno, speed);
40819 *total += rtx_cost (XEXP (x, 1), mode,
40820 outer_code, opno, speed);
40824 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40826 *total = cost->addss;
40829 else if (X87_FLOAT_MODE_P (mode))
40831 *total = cost->fadd;
40834 else if (FLOAT_MODE_P (mode))
40836 *total = ix86_vec_cost (mode, cost->addss);
40844 if (GET_MODE_CLASS (mode) == MODE_INT
40845 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40847 *total = (cost->add * 2
40848 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
40849 << (GET_MODE (XEXP (x, 0)) != DImode))
40850 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
40851 << (GET_MODE (XEXP (x, 1)) != DImode)));
40857 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40859 *total = cost->sse_op;
40862 else if (X87_FLOAT_MODE_P (mode))
40864 *total = cost->fchs;
40867 else if (FLOAT_MODE_P (mode))
40869 *total = ix86_vec_cost (mode, cost->sse_op);
40875 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40876 *total = ix86_vec_cost (mode, cost->sse_op);
40877 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40878 *total = cost->add * 2;
40880 *total = cost->add;
40884 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
40885 && XEXP (XEXP (x, 0), 1) == const1_rtx
40886 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
40887 && XEXP (x, 1) == const0_rtx)
40889 /* This kind of construct is implemented using test[bwl].
40890 Treat it as if we had an AND. */
40891 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
40892 *total = (cost->add
40893 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
40895 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
40899 /* The embedded comparison operand is completely free. */
40900 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
40901 && XEXP (x, 1) == const0_rtx)
40907 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
40910 *total = ix86_vec_cost (mode, cost->addss);
40913 case FLOAT_TRUNCATE:
40914 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
40915 *total = cost->fadd;
40917 *total = ix86_vec_cost (mode, cost->addss);
40921 /* SSE requires memory load for the constant operand. It may make
40922 sense to account for this. Of course the constant operand may or
40923 may not be reused. */
40924 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40925 *total = cost->sse_op;
40926 else if (X87_FLOAT_MODE_P (mode))
40927 *total = cost->fabs;
40928 else if (FLOAT_MODE_P (mode))
40929 *total = ix86_vec_cost (mode, cost->sse_op);
40933 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40934 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
40935 else if (X87_FLOAT_MODE_P (mode))
40936 *total = cost->fsqrt;
40937 else if (FLOAT_MODE_P (mode))
40938 *total = ix86_vec_cost (mode,
40939 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
40943 if (XINT (x, 1) == UNSPEC_TP)
40949 case VEC_DUPLICATE:
40950 /* ??? Assume all of these vector manipulation patterns are
40951 recognizable. In which case they all pretty much have the
40953 *total = cost->sse_op;
40956 mask = XEXP (x, 2);
40957 /* This is masked instruction, assume the same cost,
40958 as nonmasked variant. */
40959 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
40960 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
40962 *total = cost->sse_op;
40972 static int current_machopic_label_num;
40974 /* Given a symbol name and its associated stub, write out the
40975 definition of the stub. */
40978 machopic_output_stub (FILE *file, const char *symb, const char *stub)
40980 unsigned int length;
40981 char *binder_name, *symbol_name, lazy_ptr_name[32];
40982 int label = ++current_machopic_label_num;
40984 /* For 64-bit we shouldn't get here. */
40985 gcc_assert (!TARGET_64BIT);
40987 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
40988 symb = targetm.strip_name_encoding (symb);
40990 length = strlen (stub);
40991 binder_name = XALLOCAVEC (char, length + 32);
40992 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
40994 length = strlen (symb);
40995 symbol_name = XALLOCAVEC (char, length + 32);
40996 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
40998 sprintf (lazy_ptr_name, "L%d$lz", label);
41000 if (MACHOPIC_ATT_STUB)
41001 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
41002 else if (MACHOPIC_PURE)
41003 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
41005 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
41007 fprintf (file, "%s:\n", stub);
41008 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41010 if (MACHOPIC_ATT_STUB)
41012 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
41014 else if (MACHOPIC_PURE)
41017 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41018 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
41019 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
41020 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
41021 label, lazy_ptr_name, label);
41022 fprintf (file, "\tjmp\t*%%ecx\n");
41025 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
41027 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
41028 it needs no stub-binding-helper. */
41029 if (MACHOPIC_ATT_STUB)
41032 fprintf (file, "%s:\n", binder_name);
41036 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
41037 fprintf (file, "\tpushl\t%%ecx\n");
41040 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
41042 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
41044 /* N.B. Keep the correspondence of these
41045 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
41046 old-pic/new-pic/non-pic stubs; altering this will break
41047 compatibility with existing dylibs. */
41050 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41051 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
41054 /* 16-byte -mdynamic-no-pic stub. */
41055 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
41057 fprintf (file, "%s:\n", lazy_ptr_name);
41058 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41059 fprintf (file, ASM_LONG "%s\n", binder_name);
41061 #endif /* TARGET_MACHO */
41063 /* Order the registers for register allocator. */
41066 x86_order_regs_for_local_alloc (void)
41071 /* First allocate the local general purpose registers. */
41072 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41073 if (GENERAL_REGNO_P (i) && call_used_regs[i])
41074 reg_alloc_order [pos++] = i;
41076 /* Global general purpose registers. */
41077 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41078 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
41079 reg_alloc_order [pos++] = i;
41081 /* x87 registers come first in case we are doing FP math
41083 if (!TARGET_SSE_MATH)
41084 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41085 reg_alloc_order [pos++] = i;
41087 /* SSE registers. */
41088 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
41089 reg_alloc_order [pos++] = i;
41090 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
41091 reg_alloc_order [pos++] = i;
41093 /* Extended REX SSE registers. */
41094 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
41095 reg_alloc_order [pos++] = i;
41097 /* Mask register. */
41098 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
41099 reg_alloc_order [pos++] = i;
41101 /* x87 registers. */
41102 if (TARGET_SSE_MATH)
41103 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41104 reg_alloc_order [pos++] = i;
41106 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
41107 reg_alloc_order [pos++] = i;
41109 /* Initialize the rest of array as we do not allocate some registers
41111 while (pos < FIRST_PSEUDO_REGISTER)
41112 reg_alloc_order [pos++] = 0;
41115 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
41116 in struct attribute_spec handler. */
41118 ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int,
41119 bool *no_add_attrs)
41121 if (TREE_CODE (*node) != FUNCTION_TYPE
41122 && TREE_CODE (*node) != METHOD_TYPE
41123 && TREE_CODE (*node) != FIELD_DECL
41124 && TREE_CODE (*node) != TYPE_DECL)
41126 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41128 *no_add_attrs = true;
41133 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
41135 *no_add_attrs = true;
41138 if (is_attribute_p ("callee_pop_aggregate_return", name))
41142 cst = TREE_VALUE (args);
41143 if (TREE_CODE (cst) != INTEGER_CST)
41145 warning (OPT_Wattributes,
41146 "%qE attribute requires an integer constant argument",
41148 *no_add_attrs = true;
41150 else if (compare_tree_int (cst, 0) != 0
41151 && compare_tree_int (cst, 1) != 0)
41153 warning (OPT_Wattributes,
41154 "argument to %qE attribute is neither zero, nor one",
41156 *no_add_attrs = true;
41165 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
41166 struct attribute_spec.handler. */
41168 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
41169 bool *no_add_attrs)
41171 if (TREE_CODE (*node) != FUNCTION_TYPE
41172 && TREE_CODE (*node) != METHOD_TYPE
41173 && TREE_CODE (*node) != FIELD_DECL
41174 && TREE_CODE (*node) != TYPE_DECL)
41176 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41178 *no_add_attrs = true;
41182 /* Can combine regparm with all attributes but fastcall. */
41183 if (is_attribute_p ("ms_abi", name))
41185 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
41187 error ("ms_abi and sysv_abi attributes are not compatible");
41192 else if (is_attribute_p ("sysv_abi", name))
41194 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
41196 error ("ms_abi and sysv_abi attributes are not compatible");
41205 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
41206 struct attribute_spec.handler. */
41208 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
41209 bool *no_add_attrs)
41212 if (DECL_P (*node))
41214 if (TREE_CODE (*node) == TYPE_DECL)
41215 type = &TREE_TYPE (*node);
41220 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
41222 warning (OPT_Wattributes, "%qE attribute ignored",
41224 *no_add_attrs = true;
41227 else if ((is_attribute_p ("ms_struct", name)
41228 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
41229 || ((is_attribute_p ("gcc_struct", name)
41230 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
41232 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
41234 *no_add_attrs = true;
41241 ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
41242 bool *no_add_attrs)
41244 if (TREE_CODE (*node) != FUNCTION_DECL)
41246 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41248 *no_add_attrs = true;
41251 if (is_attribute_p ("indirect_branch", name))
41253 tree cst = TREE_VALUE (args);
41254 if (TREE_CODE (cst) != STRING_CST)
41256 warning (OPT_Wattributes,
41257 "%qE attribute requires a string constant argument",
41259 *no_add_attrs = true;
41261 else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41262 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41263 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41264 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41266 warning (OPT_Wattributes,
41267 "argument to %qE attribute is not "
41268 "(keep|thunk|thunk-inline|thunk-extern)", name);
41269 *no_add_attrs = true;
41273 if (is_attribute_p ("function_return", name))
41275 tree cst = TREE_VALUE (args);
41276 if (TREE_CODE (cst) != STRING_CST)
41278 warning (OPT_Wattributes,
41279 "%qE attribute requires a string constant argument",
41281 *no_add_attrs = true;
41283 else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41284 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41285 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41286 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41288 warning (OPT_Wattributes,
41289 "argument to %qE attribute is not "
41290 "(keep|thunk|thunk-inline|thunk-extern)", name);
41291 *no_add_attrs = true;
41299 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
41306 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
41308 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
41309 but the function type contains args and return type data. */
41310 tree func_type = *node;
41311 tree return_type = TREE_TYPE (func_type);
41314 tree current_arg_type = TYPE_ARG_TYPES (func_type);
41315 while (current_arg_type
41316 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
41320 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
41321 error ("interrupt service routine should have a pointer "
41322 "as the first argument");
41324 else if (nargs == 1)
41326 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
41327 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
41328 error ("interrupt service routine should have unsigned %s"
41329 "int as the second argument",
41331 ? (TARGET_X32 ? "long long " : "long ")
41335 current_arg_type = TREE_CHAIN (current_arg_type);
41337 if (!nargs || nargs > 2)
41338 error ("interrupt service routine can only have a pointer argument "
41339 "and an optional integer argument");
41340 if (! VOID_TYPE_P (return_type))
41341 error ("interrupt service routine can't have non-void return value");
41347 ix86_ms_bitfield_layout_p (const_tree record_type)
41349 return ((TARGET_MS_BITFIELD_LAYOUT
41350 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
41351 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
41354 /* Returns an expression indicating where the this parameter is
41355 located on entry to the FUNCTION. */
41358 x86_this_parameter (tree function)
41360 tree type = TREE_TYPE (function);
41361 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
41366 const int *parm_regs;
41368 if (ix86_function_type_abi (type) == MS_ABI)
41369 parm_regs = x86_64_ms_abi_int_parameter_registers;
41371 parm_regs = x86_64_int_parameter_registers;
41372 return gen_rtx_REG (Pmode, parm_regs[aggr]);
41375 nregs = ix86_function_regparm (type, function);
41377 if (nregs > 0 && !stdarg_p (type))
41380 unsigned int ccvt = ix86_get_callcvt (type);
41382 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41383 regno = aggr ? DX_REG : CX_REG;
41384 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41388 return gen_rtx_MEM (SImode,
41389 plus_constant (Pmode, stack_pointer_rtx, 4));
41398 return gen_rtx_MEM (SImode,
41399 plus_constant (Pmode,
41400 stack_pointer_rtx, 4));
41403 return gen_rtx_REG (SImode, regno);
41406 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
41410 /* Determine whether x86_output_mi_thunk can succeed. */
41413 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
41414 const_tree function)
41416 /* 64-bit can handle anything. */
41420 /* For 32-bit, everything's fine if we have one free register. */
41421 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
41424 /* Need a free register for vcall_offset. */
41428 /* Need a free register for GOT references. */
41429 if (flag_pic && !targetm.binds_local_p (function))
41432 /* Otherwise ok. */
41436 /* Output the assembler code for a thunk function. THUNK_DECL is the
41437 declaration for the thunk function itself, FUNCTION is the decl for
41438 the target function. DELTA is an immediate constant offset to be
41439 added to THIS. If VCALL_OFFSET is nonzero, the word at
41440 *(*this + vcall_offset) should be added to THIS. */
41443 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
41444 HOST_WIDE_INT vcall_offset, tree function)
41446 rtx this_param = x86_this_parameter (function);
41447 rtx this_reg, tmp, fnaddr;
41448 unsigned int tmp_regno;
41452 tmp_regno = R10_REG;
41455 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
41456 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41457 tmp_regno = AX_REG;
41458 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41459 tmp_regno = DX_REG;
41461 tmp_regno = CX_REG;
41464 emit_note (NOTE_INSN_PROLOGUE_END);
41466 /* CET is enabled, insert EB instruction. */
41467 if ((flag_cf_protection & CF_BRANCH))
41468 emit_insn (gen_nop_endbr ());
41470 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
41471 pull it in now and let DELTA benefit. */
41472 if (REG_P (this_param))
41473 this_reg = this_param;
41474 else if (vcall_offset)
41476 /* Put the this parameter into %eax. */
41477 this_reg = gen_rtx_REG (Pmode, AX_REG);
41478 emit_move_insn (this_reg, this_param);
41481 this_reg = NULL_RTX;
41483 /* Adjust the this parameter by a fixed constant. */
41486 rtx delta_rtx = GEN_INT (delta);
41487 rtx delta_dst = this_reg ? this_reg : this_param;
41491 if (!x86_64_general_operand (delta_rtx, Pmode))
41493 tmp = gen_rtx_REG (Pmode, tmp_regno);
41494 emit_move_insn (tmp, delta_rtx);
41499 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
41502 /* Adjust the this parameter by a value stored in the vtable. */
41505 rtx vcall_addr, vcall_mem, this_mem;
41507 tmp = gen_rtx_REG (Pmode, tmp_regno);
41509 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
41510 if (Pmode != ptr_mode)
41511 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
41512 emit_move_insn (tmp, this_mem);
41514 /* Adjust the this parameter. */
41515 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
41517 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
41519 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
41520 emit_move_insn (tmp2, GEN_INT (vcall_offset));
41521 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
41524 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
41525 if (Pmode != ptr_mode)
41526 emit_insn (gen_addsi_1_zext (this_reg,
41527 gen_rtx_REG (ptr_mode,
41531 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
41534 /* If necessary, drop THIS back to its stack slot. */
41535 if (this_reg && this_reg != this_param)
41536 emit_move_insn (this_param, this_reg);
41538 fnaddr = XEXP (DECL_RTL (function), 0);
41541 if (!flag_pic || targetm.binds_local_p (function)
41546 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
41547 tmp = gen_rtx_CONST (Pmode, tmp);
41548 fnaddr = gen_const_mem (Pmode, tmp);
41553 if (!flag_pic || targetm.binds_local_p (function))
41556 else if (TARGET_MACHO)
41558 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
41559 fnaddr = XEXP (fnaddr, 0);
41561 #endif /* TARGET_MACHO */
41564 tmp = gen_rtx_REG (Pmode, CX_REG);
41565 output_set_got (tmp, NULL_RTX);
41567 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
41568 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
41569 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
41570 fnaddr = gen_const_mem (Pmode, fnaddr);
41574 /* Our sibling call patterns do not allow memories, because we have no
41575 predicate that can distinguish between frame and non-frame memory.
41576 For our purposes here, we can get away with (ab)using a jump pattern,
41577 because we're going to do no optimization. */
41578 if (MEM_P (fnaddr))
41580 if (sibcall_insn_operand (fnaddr, word_mode))
41582 fnaddr = XEXP (DECL_RTL (function), 0);
41583 tmp = gen_rtx_MEM (QImode, fnaddr);
41584 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41585 tmp = emit_call_insn (tmp);
41586 SIBLING_CALL_P (tmp) = 1;
41589 emit_jump_insn (gen_indirect_jump (fnaddr));
41593 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
41595 // CM_LARGE_PIC always uses pseudo PIC register which is
41596 // uninitialized. Since FUNCTION is local and calling it
41597 // doesn't go through PLT, we use scratch register %r11 as
41598 // PIC register and initialize it here.
41599 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
41600 ix86_init_large_pic_reg (tmp_regno);
41601 fnaddr = legitimize_pic_address (fnaddr,
41602 gen_rtx_REG (Pmode, tmp_regno));
41605 if (!sibcall_insn_operand (fnaddr, word_mode))
41607 tmp = gen_rtx_REG (word_mode, tmp_regno);
41608 if (GET_MODE (fnaddr) != word_mode)
41609 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
41610 emit_move_insn (tmp, fnaddr);
41614 tmp = gen_rtx_MEM (QImode, fnaddr);
41615 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41616 tmp = emit_call_insn (tmp);
41617 SIBLING_CALL_P (tmp) = 1;
41621 /* Emit just enough of rest_of_compilation to get the insns emitted.
41622 Note that use_thunk calls assemble_start_function et al. */
41623 insn = get_insns ();
41624 shorten_branches (insn);
41625 final_start_function (insn, file, 1);
41626 final (insn, file, 1);
41627 final_end_function ();
41631 x86_file_start (void)
41633 default_file_start ();
41635 fputs ("\t.code16gcc\n", asm_out_file);
41637 darwin_file_start ();
41639 if (X86_FILE_START_VERSION_DIRECTIVE)
41640 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
41641 if (X86_FILE_START_FLTUSED)
41642 fputs ("\t.global\t__fltused\n", asm_out_file);
41643 if (ix86_asm_dialect == ASM_INTEL)
41644 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
41648 x86_field_alignment (tree type, int computed)
41652 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
41655 return iamcu_alignment (type, computed);
41656 mode = TYPE_MODE (strip_array_types (type));
41657 if (mode == DFmode || mode == DCmode
41658 || GET_MODE_CLASS (mode) == MODE_INT
41659 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
41660 return MIN (32, computed);
41664 /* Print call to TARGET to FILE. */
41667 x86_print_call_or_nop (FILE *file, const char *target)
41669 if (flag_nop_mcount || !strcmp (target, "nop"))
41670 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
41671 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
41673 fprintf (file, "1:\tcall\t%s\n", target);
41677 current_fentry_name (const char **name)
41679 tree attr = lookup_attribute ("fentry_name",
41680 DECL_ATTRIBUTES (current_function_decl));
41683 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
41688 current_fentry_section (const char **name)
41690 tree attr = lookup_attribute ("fentry_section",
41691 DECL_ATTRIBUTES (current_function_decl));
41694 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
41698 /* Output assembler code to FILE to increment profiler label # LABELNO
41699 for profiling a function entry. */
41701 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
41703 if (cfun->machine->endbr_queued_at_entrance)
41704 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
41706 const char *mcount_name = MCOUNT_NAME;
41708 if (current_fentry_name (&mcount_name))
41710 else if (fentry_name)
41711 mcount_name = fentry_name;
41712 else if (flag_fentry)
41713 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
41717 #ifndef NO_PROFILE_COUNTERS
41718 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
41721 if (!TARGET_PECOFF && flag_pic)
41722 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
41724 x86_print_call_or_nop (file, mcount_name);
41728 #ifndef NO_PROFILE_COUNTERS
41729 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
41732 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
41736 #ifndef NO_PROFILE_COUNTERS
41737 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
41740 x86_print_call_or_nop (file, mcount_name);
41743 if (flag_record_mcount
41744 || lookup_attribute ("fentry_section",
41745 DECL_ATTRIBUTES (current_function_decl)))
41747 const char *sname = "__mcount_loc";
41749 if (current_fentry_section (&sname))
41751 else if (fentry_section)
41752 sname = fentry_section;
41754 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
41755 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
41756 fprintf (file, "\t.previous\n");
41760 /* We don't have exact information about the insn sizes, but we may assume
41761 quite safely that we are informed about all 1 byte insns and memory
41762 address sizes. This is enough to eliminate unnecessary padding in
41766 ix86_min_insn_size (rtx_insn *insn)
41770 if (!INSN_P (insn) || !active_insn_p (insn))
41773 /* Discard alignments we've emit and jump instructions. */
41774 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
41775 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
41778 /* Important case - calls are always 5 bytes.
41779 It is common to have many calls in the row. */
41781 && symbolic_reference_mentioned_p (PATTERN (insn))
41782 && !SIBLING_CALL_P (insn))
41784 len = get_attr_length (insn);
41788 /* For normal instructions we rely on get_attr_length being exact,
41789 with a few exceptions. */
41790 if (!JUMP_P (insn))
41792 enum attr_type type = get_attr_type (insn);
41797 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
41798 || asm_noperands (PATTERN (insn)) >= 0)
41805 /* Otherwise trust get_attr_length. */
41809 l = get_attr_length_address (insn);
41810 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
41819 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41821 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
41825 ix86_avoid_jump_mispredicts (void)
41827 rtx_insn *insn, *start = get_insns ();
41828 int nbytes = 0, njumps = 0;
41829 bool isjump = false;
41831 /* Look for all minimal intervals of instructions containing 4 jumps.
41832 The intervals are bounded by START and INSN. NBYTES is the total
41833 size of instructions in the interval including INSN and not including
41834 START. When the NBYTES is smaller than 16 bytes, it is possible
41835 that the end of START and INSN ends up in the same 16byte page.
41837 The smallest offset in the page INSN can start is the case where START
41838 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
41839 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
41841 Don't consider asm goto as jump, while it can contain a jump, it doesn't
41842 have to, control transfer to label(s) can be performed through other
41843 means, and also we estimate minimum length of all asm stmts as 0. */
41844 for (insn = start; insn; insn = NEXT_INSN (insn))
41848 if (LABEL_P (insn))
41850 align_flags alignment = label_to_alignment (insn);
41851 int align = alignment.levels[0].log;
41852 int max_skip = alignment.levels[0].maxskip;
41856 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
41857 already in the current 16 byte page, because otherwise
41858 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
41859 bytes to reach 16 byte boundary. */
41861 || (align <= 3 && max_skip != (1 << align) - 1))
41864 fprintf (dump_file, "Label %i with max_skip %i\n",
41865 INSN_UID (insn), max_skip);
41868 while (nbytes + max_skip >= 16)
41870 start = NEXT_INSN (start);
41871 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
41873 njumps--, isjump = true;
41876 nbytes -= ix86_min_insn_size (start);
41882 min_size = ix86_min_insn_size (insn);
41883 nbytes += min_size;
41885 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
41886 INSN_UID (insn), min_size);
41887 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
41895 start = NEXT_INSN (start);
41896 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
41898 njumps--, isjump = true;
41901 nbytes -= ix86_min_insn_size (start);
41903 gcc_assert (njumps >= 0);
41905 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
41906 INSN_UID (start), INSN_UID (insn), nbytes);
41908 if (njumps == 3 && isjump && nbytes < 16)
41910 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
41913 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
41914 INSN_UID (insn), padsize);
41915 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
41921 /* AMD Athlon works faster
41922 when RET is not destination of conditional jump or directly preceded
41923 by other jump instruction. We avoid the penalty by inserting NOP just
41924 before the RET instructions in such cases. */
41926 ix86_pad_returns (void)
41931 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
41933 basic_block bb = e->src;
41934 rtx_insn *ret = BB_END (bb);
41936 bool replace = false;
41938 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
41939 || optimize_bb_for_size_p (bb))
41941 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
41942 if (active_insn_p (prev) || LABEL_P (prev))
41944 if (prev && LABEL_P (prev))
41949 FOR_EACH_EDGE (e, ei, bb->preds)
41950 if (EDGE_FREQUENCY (e) && e->src->index >= 0
41951 && !(e->flags & EDGE_FALLTHRU))
41959 prev = prev_active_insn (ret);
41961 && ((JUMP_P (prev) && any_condjump_p (prev))
41964 /* Empty functions get branch mispredict even when
41965 the jump destination is not visible to us. */
41966 if (!prev && !optimize_function_for_size_p (cfun))
41971 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
41977 /* Count the minimum number of instructions in BB. Return 4 if the
41978 number of instructions >= 4. */
41981 ix86_count_insn_bb (basic_block bb)
41984 int insn_count = 0;
41986 /* Count number of instructions in this block. Return 4 if the number
41987 of instructions >= 4. */
41988 FOR_BB_INSNS (bb, insn)
41990 /* Only happen in exit blocks. */
41992 && ANY_RETURN_P (PATTERN (insn)))
41995 if (NONDEBUG_INSN_P (insn)
41996 && GET_CODE (PATTERN (insn)) != USE
41997 && GET_CODE (PATTERN (insn)) != CLOBBER)
42000 if (insn_count >= 4)
42009 /* Count the minimum number of instructions in code path in BB.
42010 Return 4 if the number of instructions >= 4. */
42013 ix86_count_insn (basic_block bb)
42017 int min_prev_count;
42019 /* Only bother counting instructions along paths with no
42020 more than 2 basic blocks between entry and exit. Given
42021 that BB has an edge to exit, determine if a predecessor
42022 of BB has an edge from entry. If so, compute the number
42023 of instructions in the predecessor block. If there
42024 happen to be multiple such blocks, compute the minimum. */
42025 min_prev_count = 4;
42026 FOR_EACH_EDGE (e, ei, bb->preds)
42029 edge_iterator prev_ei;
42031 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42033 min_prev_count = 0;
42036 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
42038 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42040 int count = ix86_count_insn_bb (e->src);
42041 if (count < min_prev_count)
42042 min_prev_count = count;
42048 if (min_prev_count < 4)
42049 min_prev_count += ix86_count_insn_bb (bb);
42051 return min_prev_count;
42054 /* Pad short function to 4 instructions. */
42057 ix86_pad_short_function (void)
42062 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42064 rtx_insn *ret = BB_END (e->src);
42065 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
42067 int insn_count = ix86_count_insn (e->src);
42069 /* Pad short function. */
42070 if (insn_count < 4)
42072 rtx_insn *insn = ret;
42074 /* Find epilogue. */
42077 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
42078 insn = PREV_INSN (insn);
42083 /* Two NOPs count as one instruction. */
42084 insn_count = 2 * (4 - insn_count);
42085 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
42091 /* Fix up a Windows system unwinder issue. If an EH region falls through into
42092 the epilogue, the Windows system unwinder will apply epilogue logic and
42093 produce incorrect offsets. This can be avoided by adding a nop between
42094 the last insn that can throw and the first insn of the epilogue. */
42097 ix86_seh_fixup_eh_fallthru (void)
42102 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42104 rtx_insn *insn, *next;
42106 /* Find the beginning of the epilogue. */
42107 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
42108 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
42113 /* We only care about preceding insns that can throw. */
42114 insn = prev_active_insn (insn);
42115 if (insn == NULL || !can_throw_internal (insn))
42118 /* Do not separate calls from their debug information. */
42119 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
42120 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
42125 emit_insn_after (gen_nops (const1_rtx), insn);
42129 /* Implement machine specific optimizations. We implement padding of returns
42130 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
42134 /* We are freeing block_for_insn in the toplev to keep compatibility
42135 with old MDEP_REORGS that are not CFG based. Recompute it now. */
42136 compute_bb_for_insn ();
42138 if (TARGET_SEH && current_function_has_exception_handlers ())
42139 ix86_seh_fixup_eh_fallthru ();
42141 if (optimize && optimize_function_for_speed_p (cfun))
42143 if (TARGET_PAD_SHORT_FUNCTION)
42144 ix86_pad_short_function ();
42145 else if (TARGET_PAD_RETURNS)
42146 ix86_pad_returns ();
42147 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42148 if (TARGET_FOUR_JUMP_LIMIT)
42149 ix86_avoid_jump_mispredicts ();
42154 /* Return nonzero when QImode register that must be represented via REX prefix
42157 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
42160 extract_insn_cached (insn);
42161 for (i = 0; i < recog_data.n_operands; i++)
42162 if (GENERAL_REG_P (recog_data.operand[i])
42163 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
42168 /* Return true when INSN mentions register that must be encoded using REX
42171 x86_extended_reg_mentioned_p (rtx insn)
42173 subrtx_iterator::array_type array;
42174 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
42176 const_rtx x = *iter;
42178 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
42184 /* If profitable, negate (without causing overflow) integer constant
42185 of mode MODE at location LOC. Return true in this case. */
42187 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
42191 if (!CONST_INT_P (*loc))
42197 /* DImode x86_64 constants must fit in 32 bits. */
42198 gcc_assert (x86_64_immediate_operand (*loc, mode));
42209 gcc_unreachable ();
42212 /* Avoid overflows. */
42213 if (mode_signbit_p (mode, *loc))
42216 val = INTVAL (*loc);
42218 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
42219 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
42220 if ((val < 0 && val != -128)
42223 *loc = GEN_INT (-val);
42230 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
42231 optabs would emit if we didn't have TFmode patterns. */
42234 x86_emit_floatuns (rtx operands[2])
42236 rtx_code_label *neglab, *donelab;
42237 rtx i0, i1, f0, in, out;
42238 machine_mode mode, inmode;
42240 inmode = GET_MODE (operands[1]);
42241 gcc_assert (inmode == SImode || inmode == DImode);
42244 in = force_reg (inmode, operands[1]);
42245 mode = GET_MODE (out);
42246 neglab = gen_label_rtx ();
42247 donelab = gen_label_rtx ();
42248 f0 = gen_reg_rtx (mode);
42250 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
42252 expand_float (out, in, 0);
42254 emit_jump_insn (gen_jump (donelab));
42257 emit_label (neglab);
42259 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
42261 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
42263 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
42265 expand_float (f0, i0, 0);
42267 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
42269 emit_label (donelab);
42272 static bool canonicalize_perm (struct expand_vec_perm_d *d);
42273 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
42274 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
42275 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
42277 /* Get a vector mode of the same size as the original but with elements
42278 twice as wide. This is only guaranteed to apply to integral vectors. */
42280 static inline machine_mode
42281 get_mode_wider_vector (machine_mode o)
42283 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
42284 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
42285 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
42286 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
42290 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
42291 fill target with val via vec_duplicate. */
42294 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
42300 /* First attempt to recognize VAL as-is. */
42301 dup = gen_vec_duplicate (mode, val);
42302 insn = emit_insn (gen_rtx_SET (target, dup));
42303 if (recog_memoized (insn) < 0)
42306 machine_mode innermode = GET_MODE_INNER (mode);
42309 /* If that fails, force VAL into a register. */
42312 reg = force_reg (innermode, val);
42313 if (GET_MODE (reg) != innermode)
42314 reg = gen_lowpart (innermode, reg);
42315 SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
42316 seq = get_insns ();
42319 emit_insn_before (seq, insn);
42321 ok = recog_memoized (insn) >= 0;
42327 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42328 with all elements equal to VAR. Return true if successful. */
42331 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
42332 rtx target, rtx val)
42356 return ix86_vector_duplicate_value (mode, target, val);
42361 if (TARGET_SSE || TARGET_3DNOW_A)
42365 val = gen_lowpart (SImode, val);
42366 x = gen_rtx_TRUNCATE (HImode, val);
42367 x = gen_rtx_VEC_DUPLICATE (mode, x);
42368 emit_insn (gen_rtx_SET (target, x));
42380 return ix86_vector_duplicate_value (mode, target, val);
42384 struct expand_vec_perm_d dperm;
42388 memset (&dperm, 0, sizeof (dperm));
42389 dperm.target = target;
42390 dperm.vmode = mode;
42391 dperm.nelt = GET_MODE_NUNITS (mode);
42392 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
42393 dperm.one_operand_p = true;
42395 /* Extend to SImode using a paradoxical SUBREG. */
42396 tmp1 = gen_reg_rtx (SImode);
42397 emit_move_insn (tmp1, gen_lowpart (SImode, val));
42399 /* Insert the SImode value as low element of a V4SImode vector. */
42400 tmp2 = gen_reg_rtx (V4SImode);
42401 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
42402 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
42404 ok = (expand_vec_perm_1 (&dperm)
42405 || expand_vec_perm_broadcast_1 (&dperm));
42413 return ix86_vector_duplicate_value (mode, target, val);
42420 /* Replicate the value once into the next wider mode and recurse. */
42422 machine_mode smode, wsmode, wvmode;
42425 smode = GET_MODE_INNER (mode);
42426 wvmode = get_mode_wider_vector (mode);
42427 wsmode = GET_MODE_INNER (wvmode);
42429 val = convert_modes (wsmode, smode, val, true);
42430 x = expand_simple_binop (wsmode, ASHIFT, val,
42431 GEN_INT (GET_MODE_BITSIZE (smode)),
42432 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42433 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
42435 x = gen_reg_rtx (wvmode);
42436 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
42438 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
42445 return ix86_vector_duplicate_value (mode, target, val);
42448 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
42449 rtx x = gen_reg_rtx (hvmode);
42451 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42454 x = gen_rtx_VEC_CONCAT (mode, x, x);
42455 emit_insn (gen_rtx_SET (target, x));
42461 if (TARGET_AVX512BW)
42462 return ix86_vector_duplicate_value (mode, target, val);
42465 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
42466 rtx x = gen_reg_rtx (hvmode);
42468 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42471 x = gen_rtx_VEC_CONCAT (mode, x, x);
42472 emit_insn (gen_rtx_SET (target, x));
42481 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42482 whose ONE_VAR element is VAR, and other elements are zero. Return true
42486 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
42487 rtx target, rtx var, int one_var)
42489 machine_mode vsimode;
42492 bool use_vector_set = false;
42493 rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
42498 /* For SSE4.1, we normally use vector set. But if the second
42499 element is zero and inter-unit moves are OK, we use movq
42501 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
42502 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
42508 use_vector_set = TARGET_SSE4_1;
42511 use_vector_set = TARGET_SSE2;
42514 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
42518 use_vector_set = TARGET_AVX;
42521 use_vector_set = TARGET_AVX;
42522 gen_vec_set_0 = gen_vec_setv8si_0;
42525 use_vector_set = TARGET_AVX;
42526 gen_vec_set_0 = gen_vec_setv8sf_0;
42529 use_vector_set = TARGET_AVX;
42530 gen_vec_set_0 = gen_vec_setv4df_0;
42533 /* Use ix86_expand_vector_set in 64bit mode only. */
42534 use_vector_set = TARGET_AVX && TARGET_64BIT;
42535 gen_vec_set_0 = gen_vec_setv4di_0;
42538 use_vector_set = TARGET_AVX512F && one_var == 0;
42539 gen_vec_set_0 = gen_vec_setv16si_0;
42542 use_vector_set = TARGET_AVX512F && one_var == 0;
42543 gen_vec_set_0 = gen_vec_setv16sf_0;
42546 use_vector_set = TARGET_AVX512F && one_var == 0;
42547 gen_vec_set_0 = gen_vec_setv8df_0;
42550 /* Use ix86_expand_vector_set in 64bit mode only. */
42551 use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0;
42552 gen_vec_set_0 = gen_vec_setv8di_0;
42558 if (use_vector_set)
42560 if (gen_vec_set_0 && one_var == 0)
42562 var = force_reg (GET_MODE_INNER (mode), var);
42563 emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var));
42566 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
42567 var = force_reg (GET_MODE_INNER (mode), var);
42568 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42584 var = force_reg (GET_MODE_INNER (mode), var);
42585 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
42586 emit_insn (gen_rtx_SET (target, x));
42591 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
42592 new_target = gen_reg_rtx (mode);
42594 new_target = target;
42595 var = force_reg (GET_MODE_INNER (mode), var);
42596 x = gen_rtx_VEC_DUPLICATE (mode, var);
42597 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
42598 emit_insn (gen_rtx_SET (new_target, x));
42601 /* We need to shuffle the value to the correct position, so
42602 create a new pseudo to store the intermediate result. */
42604 /* With SSE2, we can use the integer shuffle insns. */
42605 if (mode != V4SFmode && TARGET_SSE2)
42607 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
42609 GEN_INT (one_var == 1 ? 0 : 1),
42610 GEN_INT (one_var == 2 ? 0 : 1),
42611 GEN_INT (one_var == 3 ? 0 : 1)));
42612 if (target != new_target)
42613 emit_move_insn (target, new_target);
42617 /* Otherwise convert the intermediate result to V4SFmode and
42618 use the SSE1 shuffle instructions. */
42619 if (mode != V4SFmode)
42621 tmp = gen_reg_rtx (V4SFmode);
42622 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
42627 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
42629 GEN_INT (one_var == 1 ? 0 : 1),
42630 GEN_INT (one_var == 2 ? 0+4 : 1+4),
42631 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
42633 if (mode != V4SFmode)
42634 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
42635 else if (tmp != target)
42636 emit_move_insn (target, tmp);
42638 else if (target != new_target)
42639 emit_move_insn (target, new_target);
42644 vsimode = V4SImode;
42650 vsimode = V2SImode;
42656 /* Zero extend the variable element to SImode and recurse. */
42657 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
42659 x = gen_reg_rtx (vsimode);
42660 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
42662 gcc_unreachable ();
42664 emit_move_insn (target, gen_lowpart (mode, x));
42672 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42673 consisting of the values in VALS. It is known that all elements
42674 except ONE_VAR are constants. Return true if successful. */
42677 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
42678 rtx target, rtx vals, int one_var)
42680 rtx var = XVECEXP (vals, 0, one_var);
42681 machine_mode wmode;
42684 const_vec = copy_rtx (vals);
42685 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
42686 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
42694 /* For the two element vectors, it's just as easy to use
42695 the general case. */
42699 /* Use ix86_expand_vector_set in 64bit mode only. */
42723 /* There's no way to set one QImode entry easily. Combine
42724 the variable value with its adjacent constant value, and
42725 promote to an HImode set. */
42726 x = XVECEXP (vals, 0, one_var ^ 1);
42729 var = convert_modes (HImode, QImode, var, true);
42730 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
42731 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42732 x = GEN_INT (INTVAL (x) & 0xff);
42736 var = convert_modes (HImode, QImode, var, true);
42737 x = gen_int_mode (UINTVAL (x) << 8, HImode);
42739 if (x != const0_rtx)
42740 var = expand_simple_binop (HImode, IOR, var, x, var,
42741 1, OPTAB_LIB_WIDEN);
42743 x = gen_reg_rtx (wmode);
42744 emit_move_insn (x, gen_lowpart (wmode, const_vec));
42745 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
42747 emit_move_insn (target, gen_lowpart (mode, x));
42754 emit_move_insn (target, const_vec);
42755 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42759 /* A subroutine of ix86_expand_vector_init_general. Use vector
42760 concatenate to handle the most general case: all values variable,
42761 and none identical. */
42764 ix86_expand_vector_init_concat (machine_mode mode,
42765 rtx target, rtx *ops, int n)
42767 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
42768 rtx first[16], second[8], third[4];
42820 gcc_unreachable ();
42823 if (!register_operand (ops[1], cmode))
42824 ops[1] = force_reg (cmode, ops[1]);
42825 if (!register_operand (ops[0], cmode))
42826 ops[0] = force_reg (cmode, ops[0]);
42827 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
42847 gcc_unreachable ();
42871 gcc_unreachable ();
42889 gcc_unreachable ();
42894 /* FIXME: We process inputs backward to help RA. PR 36222. */
42897 for (; i > 0; i -= 2, j--)
42899 first[j] = gen_reg_rtx (cmode);
42900 v = gen_rtvec (2, ops[i - 1], ops[i]);
42901 ix86_expand_vector_init (false, first[j],
42902 gen_rtx_PARALLEL (cmode, v));
42908 gcc_assert (hmode != VOIDmode);
42909 gcc_assert (gmode != VOIDmode);
42910 for (i = j = 0; i < n; i += 2, j++)
42912 second[j] = gen_reg_rtx (hmode);
42913 ix86_expand_vector_init_concat (hmode, second [j],
42917 for (i = j = 0; i < n; i += 2, j++)
42919 third[j] = gen_reg_rtx (gmode);
42920 ix86_expand_vector_init_concat (gmode, third[j],
42924 ix86_expand_vector_init_concat (mode, target, third, n);
42928 gcc_assert (hmode != VOIDmode);
42929 for (i = j = 0; i < n; i += 2, j++)
42931 second[j] = gen_reg_rtx (hmode);
42932 ix86_expand_vector_init_concat (hmode, second [j],
42936 ix86_expand_vector_init_concat (mode, target, second, n);
42939 ix86_expand_vector_init_concat (mode, target, first, n);
42943 gcc_unreachable ();
42947 /* A subroutine of ix86_expand_vector_init_general. Use vector
42948 interleave to handle the most general case: all values variable,
42949 and none identical. */
42952 ix86_expand_vector_init_interleave (machine_mode mode,
42953 rtx target, rtx *ops, int n)
42955 machine_mode first_imode, second_imode, third_imode, inner_mode;
42958 rtx (*gen_load_even) (rtx, rtx, rtx);
42959 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
42960 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
42965 gen_load_even = gen_vec_setv8hi;
42966 gen_interleave_first_low = gen_vec_interleave_lowv4si;
42967 gen_interleave_second_low = gen_vec_interleave_lowv2di;
42968 inner_mode = HImode;
42969 first_imode = V4SImode;
42970 second_imode = V2DImode;
42971 third_imode = VOIDmode;
42974 gen_load_even = gen_vec_setv16qi;
42975 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
42976 gen_interleave_second_low = gen_vec_interleave_lowv4si;
42977 inner_mode = QImode;
42978 first_imode = V8HImode;
42979 second_imode = V4SImode;
42980 third_imode = V2DImode;
42983 gcc_unreachable ();
42986 for (i = 0; i < n; i++)
42988 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
42989 op0 = gen_reg_rtx (SImode);
42990 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
42992 /* Insert the SImode value as low element of V4SImode vector. */
42993 op1 = gen_reg_rtx (V4SImode);
42994 op0 = gen_rtx_VEC_MERGE (V4SImode,
42995 gen_rtx_VEC_DUPLICATE (V4SImode,
42997 CONST0_RTX (V4SImode),
42999 emit_insn (gen_rtx_SET (op1, op0));
43001 /* Cast the V4SImode vector back to a vector in orignal mode. */
43002 op0 = gen_reg_rtx (mode);
43003 emit_move_insn (op0, gen_lowpart (mode, op1));
43005 /* Load even elements into the second position. */
43006 emit_insn (gen_load_even (op0,
43007 force_reg (inner_mode,
43011 /* Cast vector to FIRST_IMODE vector. */
43012 ops[i] = gen_reg_rtx (first_imode);
43013 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
43016 /* Interleave low FIRST_IMODE vectors. */
43017 for (i = j = 0; i < n; i += 2, j++)
43019 op0 = gen_reg_rtx (first_imode);
43020 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
43022 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
43023 ops[j] = gen_reg_rtx (second_imode);
43024 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
43027 /* Interleave low SECOND_IMODE vectors. */
43028 switch (second_imode)
43031 for (i = j = 0; i < n / 2; i += 2, j++)
43033 op0 = gen_reg_rtx (second_imode);
43034 emit_insn (gen_interleave_second_low (op0, ops[i],
43037 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
43039 ops[j] = gen_reg_rtx (third_imode);
43040 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
43042 second_imode = V2DImode;
43043 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43047 op0 = gen_reg_rtx (second_imode);
43048 emit_insn (gen_interleave_second_low (op0, ops[0],
43051 /* Cast the SECOND_IMODE vector back to a vector on original
43053 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
43057 gcc_unreachable ();
43061 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
43062 all values variable, and none identical. */
43065 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
43066 rtx target, rtx vals)
43068 rtx ops[64], op0, op1, op2, op3, op4, op5;
43069 machine_mode half_mode = VOIDmode;
43070 machine_mode quarter_mode = VOIDmode;
43077 if (!mmx_ok && !TARGET_SSE)
43093 n = GET_MODE_NUNITS (mode);
43094 for (i = 0; i < n; i++)
43095 ops[i] = XVECEXP (vals, 0, i);
43096 ix86_expand_vector_init_concat (mode, target, ops, n);
43100 for (i = 0; i < 2; i++)
43101 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43102 op0 = gen_reg_rtx (V4DImode);
43103 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
43104 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43108 for (i = 0; i < 4; i++)
43109 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43110 ops[4] = gen_reg_rtx (V4DImode);
43111 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
43112 ops[5] = gen_reg_rtx (V4DImode);
43113 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
43114 op0 = gen_reg_rtx (V8DImode);
43115 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
43116 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43120 half_mode = V16QImode;
43124 half_mode = V8HImode;
43128 n = GET_MODE_NUNITS (mode);
43129 for (i = 0; i < n; i++)
43130 ops[i] = XVECEXP (vals, 0, i);
43131 op0 = gen_reg_rtx (half_mode);
43132 op1 = gen_reg_rtx (half_mode);
43133 ix86_expand_vector_init_interleave (half_mode, op0, ops,
43135 ix86_expand_vector_init_interleave (half_mode, op1,
43136 &ops [n >> 1], n >> 2);
43137 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
43141 quarter_mode = V16QImode;
43142 half_mode = V32QImode;
43146 quarter_mode = V8HImode;
43147 half_mode = V16HImode;
43151 n = GET_MODE_NUNITS (mode);
43152 for (i = 0; i < n; i++)
43153 ops[i] = XVECEXP (vals, 0, i);
43154 op0 = gen_reg_rtx (quarter_mode);
43155 op1 = gen_reg_rtx (quarter_mode);
43156 op2 = gen_reg_rtx (quarter_mode);
43157 op3 = gen_reg_rtx (quarter_mode);
43158 op4 = gen_reg_rtx (half_mode);
43159 op5 = gen_reg_rtx (half_mode);
43160 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
43162 ix86_expand_vector_init_interleave (quarter_mode, op1,
43163 &ops [n >> 2], n >> 3);
43164 ix86_expand_vector_init_interleave (quarter_mode, op2,
43165 &ops [n >> 1], n >> 3);
43166 ix86_expand_vector_init_interleave (quarter_mode, op3,
43167 &ops [(n >> 1) | (n >> 2)], n >> 3);
43168 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
43169 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
43170 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
43174 if (!TARGET_SSE4_1)
43182 /* Don't use ix86_expand_vector_init_interleave if we can't
43183 move from GPR to SSE register directly. */
43184 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
43187 n = GET_MODE_NUNITS (mode);
43188 for (i = 0; i < n; i++)
43189 ops[i] = XVECEXP (vals, 0, i);
43190 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
43198 gcc_unreachable ();
43202 int i, j, n_elts, n_words, n_elt_per_word;
43203 machine_mode inner_mode;
43204 rtx words[4], shift;
43206 inner_mode = GET_MODE_INNER (mode);
43207 n_elts = GET_MODE_NUNITS (mode);
43208 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
43209 n_elt_per_word = n_elts / n_words;
43210 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
43212 for (i = 0; i < n_words; ++i)
43214 rtx word = NULL_RTX;
43216 for (j = 0; j < n_elt_per_word; ++j)
43218 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
43219 elt = convert_modes (word_mode, inner_mode, elt, true);
43225 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
43226 word, 1, OPTAB_LIB_WIDEN);
43227 word = expand_simple_binop (word_mode, IOR, word, elt,
43228 word, 1, OPTAB_LIB_WIDEN);
43236 emit_move_insn (target, gen_lowpart (mode, words[0]));
43237 else if (n_words == 2)
43239 rtx tmp = gen_reg_rtx (mode);
43240 emit_clobber (tmp);
43241 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
43242 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
43243 emit_move_insn (target, tmp);
43245 else if (n_words == 4)
43247 rtx tmp = gen_reg_rtx (V4SImode);
43248 gcc_assert (word_mode == SImode);
43249 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
43250 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
43251 emit_move_insn (target, gen_lowpart (mode, tmp));
43254 gcc_unreachable ();
43258 /* Initialize vector TARGET via VALS. Suppress the use of MMX
43259 instructions unless MMX_OK is true. */
43262 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
43264 machine_mode mode = GET_MODE (target);
43265 machine_mode inner_mode = GET_MODE_INNER (mode);
43266 int n_elts = GET_MODE_NUNITS (mode);
43267 int n_var = 0, one_var = -1;
43268 bool all_same = true, all_const_zero = true;
43272 /* Handle first initialization from vector elts. */
43273 if (n_elts != XVECLEN (vals, 0))
43275 rtx subtarget = target;
43276 x = XVECEXP (vals, 0, 0);
43277 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
43278 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
43280 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
43281 if (inner_mode == QImode || inner_mode == HImode)
43283 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
43284 mode = mode_for_vector (SImode, n_bits / 4).require ();
43285 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
43286 ops[0] = gen_lowpart (inner_mode, ops[0]);
43287 ops[1] = gen_lowpart (inner_mode, ops[1]);
43288 subtarget = gen_reg_rtx (mode);
43290 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
43291 if (subtarget != target)
43292 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
43295 gcc_unreachable ();
43298 for (i = 0; i < n_elts; ++i)
43300 x = XVECEXP (vals, 0, i);
43301 if (!(CONST_SCALAR_INT_P (x)
43302 || CONST_DOUBLE_P (x)
43303 || CONST_FIXED_P (x)))
43304 n_var++, one_var = i;
43305 else if (x != CONST0_RTX (inner_mode))
43306 all_const_zero = false;
43307 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
43311 /* Constants are best loaded from the constant pool. */
43314 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
43318 /* If all values are identical, broadcast the value. */
43320 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
43321 XVECEXP (vals, 0, 0)))
43324 /* Values where only one field is non-constant are best loaded from
43325 the pool and overwritten via move later. */
43329 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
43330 XVECEXP (vals, 0, one_var),
43334 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
43338 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
43342 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
43344 machine_mode mode = GET_MODE (target);
43345 machine_mode inner_mode = GET_MODE_INNER (mode);
43346 machine_mode half_mode;
43347 bool use_vec_merge = false;
43349 static rtx (*gen_extract[6][2]) (rtx, rtx)
43351 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
43352 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
43353 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
43354 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
43355 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
43356 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
43358 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
43360 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
43361 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
43362 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
43363 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
43364 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
43365 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
43368 machine_mode mmode = VOIDmode;
43369 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
43377 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43378 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
43380 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43382 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43383 emit_insn (gen_rtx_SET (target, tmp));
43389 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
43393 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43394 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
43396 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43398 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43399 emit_insn (gen_rtx_SET (target, tmp));
43406 /* For the two element vectors, we implement a VEC_CONCAT with
43407 the extraction of the other element. */
43409 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
43410 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
43413 op0 = val, op1 = tmp;
43415 op0 = tmp, op1 = val;
43417 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
43418 emit_insn (gen_rtx_SET (target, tmp));
43423 use_vec_merge = TARGET_SSE4_1;
43430 use_vec_merge = true;
43434 /* tmp = target = A B C D */
43435 tmp = copy_to_reg (target);
43436 /* target = A A B B */
43437 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
43438 /* target = X A B B */
43439 ix86_expand_vector_set (false, target, val, 0);
43440 /* target = A X C D */
43441 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43442 const1_rtx, const0_rtx,
43443 GEN_INT (2+4), GEN_INT (3+4)));
43447 /* tmp = target = A B C D */
43448 tmp = copy_to_reg (target);
43449 /* tmp = X B C D */
43450 ix86_expand_vector_set (false, tmp, val, 0);
43451 /* target = A B X D */
43452 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43453 const0_rtx, const1_rtx,
43454 GEN_INT (0+4), GEN_INT (3+4)));
43458 /* tmp = target = A B C D */
43459 tmp = copy_to_reg (target);
43460 /* tmp = X B C D */
43461 ix86_expand_vector_set (false, tmp, val, 0);
43462 /* target = A B X D */
43463 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43464 const0_rtx, const1_rtx,
43465 GEN_INT (2+4), GEN_INT (0+4)));
43469 gcc_unreachable ();
43474 use_vec_merge = TARGET_SSE4_1;
43478 /* Element 0 handled by vec_merge below. */
43481 use_vec_merge = true;
43487 /* With SSE2, use integer shuffles to swap element 0 and ELT,
43488 store into element 0, then shuffle them back. */
43492 order[0] = GEN_INT (elt);
43493 order[1] = const1_rtx;
43494 order[2] = const2_rtx;
43495 order[3] = GEN_INT (3);
43496 order[elt] = const0_rtx;
43498 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43499 order[1], order[2], order[3]));
43501 ix86_expand_vector_set (false, target, val, 0);
43503 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43504 order[1], order[2], order[3]));
43508 /* For SSE1, we have to reuse the V4SF code. */
43509 rtx t = gen_reg_rtx (V4SFmode);
43510 emit_move_insn (t, gen_lowpart (V4SFmode, target));
43511 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
43512 emit_move_insn (target, gen_lowpart (mode, t));
43517 use_vec_merge = TARGET_SSE2;
43520 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43524 use_vec_merge = TARGET_SSE4_1;
43531 half_mode = V16QImode;
43537 half_mode = V8HImode;
43543 half_mode = V4SImode;
43549 half_mode = V2DImode;
43555 half_mode = V4SFmode;
43561 half_mode = V2DFmode;
43567 /* Compute offset. */
43571 gcc_assert (i <= 1);
43573 /* Extract the half. */
43574 tmp = gen_reg_rtx (half_mode);
43575 emit_insn (gen_extract[j][i] (tmp, target));
43577 /* Put val in tmp at elt. */
43578 ix86_expand_vector_set (false, tmp, val, elt);
43581 emit_insn (gen_insert[j][i] (target, target, tmp));
43585 if (TARGET_AVX512F)
43588 gen_blendm = gen_avx512f_blendmv8df;
43593 if (TARGET_AVX512F)
43596 gen_blendm = gen_avx512f_blendmv8di;
43601 if (TARGET_AVX512F)
43604 gen_blendm = gen_avx512f_blendmv16sf;
43609 if (TARGET_AVX512F)
43612 gen_blendm = gen_avx512f_blendmv16si;
43617 if (TARGET_AVX512BW)
43620 gen_blendm = gen_avx512bw_blendmv32hi;
43622 else if (TARGET_AVX512F)
43624 half_mode = E_V8HImode;
43631 if (TARGET_AVX512BW)
43634 gen_blendm = gen_avx512bw_blendmv64qi;
43636 else if (TARGET_AVX512F)
43638 half_mode = E_V16QImode;
43645 /* Compute offset. */
43649 gcc_assert (i <= 3);
43652 /* Extract the quarter. */
43653 tmp = gen_reg_rtx (V4SImode);
43654 rtx tmp2 = gen_lowpart (V16SImode, target);
43655 rtx mask = gen_reg_rtx (QImode);
43657 emit_move_insn (mask, constm1_rtx);
43658 emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i),
43661 tmp2 = gen_reg_rtx (half_mode);
43662 emit_move_insn (tmp2, gen_lowpart (half_mode, tmp));
43665 /* Put val in tmp at elt. */
43666 ix86_expand_vector_set (false, tmp, val, elt);
43669 tmp2 = gen_reg_rtx (V16SImode);
43670 rtx tmp3 = gen_lowpart (V16SImode, target);
43671 mask = gen_reg_rtx (HImode);
43672 emit_move_insn (mask, constm1_rtx);
43673 tmp = gen_lowpart (V4SImode, tmp);
43674 emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i),
43676 emit_move_insn (target, gen_lowpart (mode, tmp2));
43684 if (mmode != VOIDmode)
43686 tmp = gen_reg_rtx (mode);
43687 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
43688 /* The avx512*_blendm<mode> expanders have different operand order
43689 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
43690 elements where the mask is set and second input operand otherwise,
43691 in {sse,avx}*_*blend* the first input operand is used for elements
43692 where the mask is clear and second input operand otherwise. */
43693 emit_insn (gen_blendm (target, target, tmp,
43695 gen_int_mode (HOST_WIDE_INT_1U << elt,
43698 else if (use_vec_merge)
43700 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
43701 tmp = gen_rtx_VEC_MERGE (mode, tmp, target,
43702 GEN_INT (HOST_WIDE_INT_1U << elt));
43703 emit_insn (gen_rtx_SET (target, tmp));
43707 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43709 emit_move_insn (mem, target);
43711 tmp = adjust_address (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
43712 emit_move_insn (tmp, val);
43714 emit_move_insn (target, mem);
43719 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
43721 machine_mode mode = GET_MODE (vec);
43722 machine_mode inner_mode = GET_MODE_INNER (mode);
43723 bool use_vec_extr = false;
43738 use_vec_extr = true;
43742 use_vec_extr = TARGET_SSE4_1;
43754 tmp = gen_reg_rtx (mode);
43755 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
43756 GEN_INT (elt), GEN_INT (elt),
43757 GEN_INT (elt+4), GEN_INT (elt+4)));
43761 tmp = gen_reg_rtx (mode);
43762 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
43766 gcc_unreachable ();
43769 use_vec_extr = true;
43774 use_vec_extr = TARGET_SSE4_1;
43788 tmp = gen_reg_rtx (mode);
43789 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
43790 GEN_INT (elt), GEN_INT (elt),
43791 GEN_INT (elt), GEN_INT (elt)));
43795 tmp = gen_reg_rtx (mode);
43796 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
43800 gcc_unreachable ();
43803 use_vec_extr = true;
43808 /* For SSE1, we have to reuse the V4SF code. */
43809 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
43810 gen_lowpart (V4SFmode, vec), elt);
43816 use_vec_extr = TARGET_SSE2;
43819 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43823 use_vec_extr = TARGET_SSE4_1;
43829 tmp = gen_reg_rtx (V4SFmode);
43831 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
43833 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
43834 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43842 tmp = gen_reg_rtx (V2DFmode);
43844 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
43846 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
43847 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43855 tmp = gen_reg_rtx (V16QImode);
43857 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
43859 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
43860 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43868 tmp = gen_reg_rtx (V8HImode);
43870 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
43872 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
43873 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43881 tmp = gen_reg_rtx (V4SImode);
43883 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
43885 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
43886 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43894 tmp = gen_reg_rtx (V2DImode);
43896 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
43898 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
43899 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43905 if (TARGET_AVX512BW)
43907 tmp = gen_reg_rtx (V16HImode);
43909 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
43911 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
43912 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43918 if (TARGET_AVX512BW)
43920 tmp = gen_reg_rtx (V32QImode);
43922 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
43924 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
43925 ix86_expand_vector_extract (false, target, tmp, elt & 31);
43931 tmp = gen_reg_rtx (V8SFmode);
43933 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
43935 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
43936 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43940 tmp = gen_reg_rtx (V4DFmode);
43942 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
43944 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
43945 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43949 tmp = gen_reg_rtx (V8SImode);
43951 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
43953 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
43954 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43958 tmp = gen_reg_rtx (V4DImode);
43960 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
43962 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
43963 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43967 /* ??? Could extract the appropriate HImode element and shift. */
43974 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
43975 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
43977 /* Let the rtl optimizers know about the zero extension performed. */
43978 if (inner_mode == QImode || inner_mode == HImode)
43980 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
43981 target = gen_lowpart (SImode, target);
43984 emit_insn (gen_rtx_SET (target, tmp));
43988 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43990 emit_move_insn (mem, vec);
43992 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
43993 emit_move_insn (target, tmp);
43997 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
43998 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
43999 The upper bits of DEST are undefined, though they shouldn't cause
44000 exceptions (some bits from src or all zeros are ok). */
44003 emit_reduc_half (rtx dest, rtx src, int i)
44006 switch (GET_MODE (src))
44010 tem = gen_sse_movhlps (dest, src, src);
44012 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
44013 GEN_INT (1 + 4), GEN_INT (1 + 4));
44016 tem = gen_vec_interleave_highv2df (dest, src, src);
44022 d = gen_reg_rtx (V1TImode);
44023 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
44028 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
44030 tem = gen_avx_shufps256 (dest, src, src,
44031 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
44035 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
44037 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
44045 if (GET_MODE (dest) != V4DImode)
44046 d = gen_reg_rtx (V4DImode);
44047 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
44048 gen_lowpart (V4DImode, src),
44053 d = gen_reg_rtx (V2TImode);
44054 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
44065 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
44066 gen_lowpart (V16SImode, src),
44067 gen_lowpart (V16SImode, src),
44068 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
44069 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
44070 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
44071 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
44072 GEN_INT (0xC), GEN_INT (0xD),
44073 GEN_INT (0xE), GEN_INT (0xF),
44074 GEN_INT (0x10), GEN_INT (0x11),
44075 GEN_INT (0x12), GEN_INT (0x13),
44076 GEN_INT (0x14), GEN_INT (0x15),
44077 GEN_INT (0x16), GEN_INT (0x17));
44079 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
44080 gen_lowpart (V16SImode, src),
44081 GEN_INT (i == 128 ? 0x2 : 0x1),
44085 GEN_INT (i == 128 ? 0x6 : 0x5),
44089 GEN_INT (i == 128 ? 0xA : 0x9),
44093 GEN_INT (i == 128 ? 0xE : 0xD),
44099 gcc_unreachable ();
44103 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
44106 /* Expand a vector reduction. FN is the binary pattern to reduce;
44107 DEST is the destination; IN is the input vector. */
44110 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
44112 rtx half, dst, vec = in;
44113 machine_mode mode = GET_MODE (in);
44116 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
44118 && mode == V8HImode
44119 && fn == gen_uminv8hi3)
44121 emit_insn (gen_sse4_1_phminposuw (dest, in));
44125 for (i = GET_MODE_BITSIZE (mode);
44126 i > GET_MODE_UNIT_BITSIZE (mode);
44129 half = gen_reg_rtx (mode);
44130 emit_reduc_half (half, vec, i);
44131 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
44134 dst = gen_reg_rtx (mode);
44135 emit_insn (fn (dst, half, vec));
44140 /* Target hook for scalar_mode_supported_p. */
44142 ix86_scalar_mode_supported_p (scalar_mode mode)
44144 if (DECIMAL_FLOAT_MODE_P (mode))
44145 return default_decimal_float_supported_p ();
44146 else if (mode == TFmode)
44149 return default_scalar_mode_supported_p (mode);
44152 /* Implements target hook vector_mode_supported_p. */
44154 ix86_vector_mode_supported_p (machine_mode mode)
44156 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44158 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44160 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44162 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44164 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
44166 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
44171 /* Target hook for c_mode_for_suffix. */
44172 static machine_mode
44173 ix86_c_mode_for_suffix (char suffix)
44183 /* Worker function for TARGET_MD_ASM_ADJUST.
44185 We implement asm flag outputs, and maintain source compatibility
44186 with the old cc0-based compiler. */
44189 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
44190 vec<const char *> &constraints,
44191 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
44193 bool saw_asm_flag = false;
44196 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
44198 const char *con = constraints[i];
44199 if (strncmp (con, "=@cc", 4) != 0)
44202 if (strchr (con, ',') != NULL)
44204 error ("alternatives not allowed in asm flag output");
44208 bool invert = false;
44210 invert = true, con++;
44212 machine_mode mode = CCmode;
44213 rtx_code code = UNKNOWN;
44219 mode = CCAmode, code = EQ;
44220 else if (con[1] == 'e' && con[2] == 0)
44221 mode = CCCmode, code = NE;
44225 mode = CCCmode, code = EQ;
44226 else if (con[1] == 'e' && con[2] == 0)
44227 mode = CCAmode, code = NE;
44231 mode = CCCmode, code = EQ;
44235 mode = CCZmode, code = EQ;
44239 mode = CCGCmode, code = GT;
44240 else if (con[1] == 'e' && con[2] == 0)
44241 mode = CCGCmode, code = GE;
44245 mode = CCGCmode, code = LT;
44246 else if (con[1] == 'e' && con[2] == 0)
44247 mode = CCGCmode, code = LE;
44251 mode = CCOmode, code = EQ;
44255 mode = CCPmode, code = EQ;
44259 mode = CCSmode, code = EQ;
44263 mode = CCZmode, code = EQ;
44266 if (code == UNKNOWN)
44268 error ("unknown asm flag output %qs", constraints[i]);
44272 code = reverse_condition (code);
44274 rtx dest = outputs[i];
44277 /* This is the first asm flag output. Here we put the flags
44278 register in as the real output and adjust the condition to
44280 constraints[i] = "=Bf";
44281 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
44282 saw_asm_flag = true;
44286 /* We don't need the flags register as output twice. */
44287 constraints[i] = "=X";
44288 outputs[i] = gen_rtx_SCRATCH (SImode);
44291 rtx x = gen_rtx_REG (mode, FLAGS_REG);
44292 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
44294 machine_mode dest_mode = GET_MODE (dest);
44295 if (!SCALAR_INT_MODE_P (dest_mode))
44297 error ("invalid type for asm flag output");
44301 if (dest_mode == DImode && !TARGET_64BIT)
44302 dest_mode = SImode;
44304 if (dest_mode != QImode)
44306 rtx destqi = gen_reg_rtx (QImode);
44307 emit_insn (gen_rtx_SET (destqi, x));
44309 if (TARGET_ZERO_EXTEND_WITH_AND
44310 && optimize_function_for_speed_p (cfun))
44312 x = force_reg (dest_mode, const0_rtx);
44314 emit_insn (gen_movstrictqi
44315 (gen_lowpart (QImode, x), destqi));
44318 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
44321 if (dest_mode != GET_MODE (dest))
44323 rtx tmp = gen_reg_rtx (SImode);
44325 emit_insn (gen_rtx_SET (tmp, x));
44326 emit_insn (gen_zero_extendsidi2 (dest, tmp));
44329 emit_insn (gen_rtx_SET (dest, x));
44331 rtx_insn *seq = get_insns ();
44338 /* If we had no asm flag outputs, clobber the flags. */
44339 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
44340 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
44345 /* Implements target vector targetm.asm.encode_section_info. */
44347 static void ATTRIBUTE_UNUSED
44348 ix86_encode_section_info (tree decl, rtx rtl, int first)
44350 default_encode_section_info (decl, rtl, first);
44352 if (ix86_in_large_data_p (decl))
44353 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
44356 /* Worker function for REVERSE_CONDITION. */
44359 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
44361 return (mode == CCFPmode
44362 ? reverse_condition_maybe_unordered (code)
44363 : reverse_condition (code));
44366 /* Output code to perform an x87 FP register move, from OPERANDS[1]
44370 output_387_reg_move (rtx_insn *insn, rtx *operands)
44372 if (REG_P (operands[0]))
44374 if (REG_P (operands[1])
44375 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44377 if (REGNO (operands[0]) == FIRST_STACK_REG)
44378 return output_387_ffreep (operands, 0);
44379 return "fstp\t%y0";
44381 if (STACK_TOP_P (operands[0]))
44382 return "fld%Z1\t%y1";
44385 else if (MEM_P (operands[0]))
44387 gcc_assert (REG_P (operands[1]));
44388 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44389 return "fstp%Z0\t%y0";
44392 /* There is no non-popping store to memory for XFmode.
44393 So if we need one, follow the store with a load. */
44394 if (GET_MODE (operands[0]) == XFmode)
44395 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
44397 return "fst%Z0\t%y0";
44404 /* Output code to perform a conditional jump to LABEL, if C2 flag in
44405 FP status register is set. */
44408 ix86_emit_fp_unordered_jump (rtx label)
44410 rtx reg = gen_reg_rtx (HImode);
44414 emit_insn (gen_x86_fnstsw_1 (reg));
44416 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
44418 emit_insn (gen_x86_sahf_1 (reg));
44420 temp = gen_rtx_REG (CCmode, FLAGS_REG);
44421 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
44425 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
44427 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
44428 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
44431 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
44432 gen_rtx_LABEL_REF (VOIDmode, label),
44434 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
44435 predict_jump (REG_BR_PROB_BASE * 10 / 100);
44436 JUMP_LABEL (insn) = label;
44439 /* Output code to perform an sinh XFmode calculation. */
44441 void ix86_emit_i387_sinh (rtx op0, rtx op1)
44443 rtx e1 = gen_reg_rtx (XFmode);
44444 rtx e2 = gen_reg_rtx (XFmode);
44445 rtx scratch = gen_reg_rtx (HImode);
44446 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44447 rtx half = const_double_from_real_value (dconsthalf, XFmode);
44449 rtx_code_label *jump_label = gen_label_rtx ();
44452 /* scratch = fxam (op1) */
44453 emit_insn (gen_fxamxf2_i387 (scratch, op1));
44455 /* e1 = expm1 (|op1|) */
44456 emit_insn (gen_absxf2 (e2, op1));
44457 emit_insn (gen_expm1xf2 (e1, e2));
44459 /* e2 = e1 / (e1 + 1.0) + e1 */
44460 cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
44461 emit_insn (gen_addxf3 (e2, e1, cst1));
44462 emit_insn (gen_divxf3 (e2, e1, e2));
44463 emit_insn (gen_addxf3 (e2, e2, e1));
44465 /* flags = signbit (op1) */
44466 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44468 /* if (flags) then e2 = -e2 */
44469 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44470 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44471 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44473 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44474 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44475 JUMP_LABEL (insn) = jump_label;
44477 emit_insn (gen_negxf2 (e2, e2));
44479 emit_label (jump_label);
44480 LABEL_NUSES (jump_label) = 1;
44482 /* op0 = 0.5 * e2 */
44483 half = force_reg (XFmode, half);
44484 emit_insn (gen_mulxf3 (op0, e2, half));
44487 /* Output code to perform an cosh XFmode calculation. */
44489 void ix86_emit_i387_cosh (rtx op0, rtx op1)
44491 rtx e1 = gen_reg_rtx (XFmode);
44492 rtx e2 = gen_reg_rtx (XFmode);
44493 rtx half = const_double_from_real_value (dconsthalf, XFmode);
44496 /* e1 = exp (op1) */
44497 emit_insn (gen_expxf2 (e1, op1));
44499 /* e2 = e1 + 1.0 / e1 */
44500 cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
44501 emit_insn (gen_divxf3 (e2, cst1, e1));
44502 emit_insn (gen_addxf3 (e2, e1, e2));
44504 /* op0 = 0.5 * e2 */
44505 half = force_reg (XFmode, half);
44506 emit_insn (gen_mulxf3 (op0, e2, half));
44509 /* Output code to perform an tanh XFmode calculation. */
44511 void ix86_emit_i387_tanh (rtx op0, rtx op1)
44513 rtx e1 = gen_reg_rtx (XFmode);
44514 rtx e2 = gen_reg_rtx (XFmode);
44515 rtx scratch = gen_reg_rtx (HImode);
44516 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44518 rtx_code_label *jump_label = gen_label_rtx ();
44521 /* scratch = fxam (op1) */
44522 emit_insn (gen_fxamxf2_i387 (scratch, op1));
44524 /* e1 = expm1 (-|2 * op1|) */
44525 emit_insn (gen_addxf3 (e2, op1, op1));
44526 emit_insn (gen_absxf2 (e2, e2));
44527 emit_insn (gen_negxf2 (e2, e2));
44528 emit_insn (gen_expm1xf2 (e1, e2));
44530 /* e2 = e1 / (e1 + 2.0) */
44531 cst2 = force_reg (XFmode, CONST2_RTX (XFmode));
44532 emit_insn (gen_addxf3 (e2, e1, cst2));
44533 emit_insn (gen_divxf3 (e2, e1, e2));
44535 /* flags = signbit (op1) */
44536 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44538 /* if (!flags) then e2 = -e2 */
44539 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44540 gen_rtx_NE (VOIDmode, flags, const0_rtx),
44541 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44543 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44544 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44545 JUMP_LABEL (insn) = jump_label;
44547 emit_insn (gen_negxf2 (e2, e2));
44549 emit_label (jump_label);
44550 LABEL_NUSES (jump_label) = 1;
44552 emit_move_insn (op0, e2);
44555 /* Output code to perform an asinh XFmode calculation. */
44557 void ix86_emit_i387_asinh (rtx op0, rtx op1)
44559 rtx e1 = gen_reg_rtx (XFmode);
44560 rtx e2 = gen_reg_rtx (XFmode);
44561 rtx scratch = gen_reg_rtx (HImode);
44562 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44564 rtx_code_label *jump_label = gen_label_rtx ();
44567 /* e2 = sqrt (op1^2 + 1.0) + 1.0 */
44568 emit_insn (gen_mulxf3 (e1, op1, op1));
44569 cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
44570 emit_insn (gen_addxf3 (e2, e1, cst1));
44571 emit_insn (gen_sqrtxf2 (e2, e2));
44572 emit_insn (gen_addxf3 (e2, e2, cst1));
44575 emit_insn (gen_divxf3 (e1, e1, e2));
44577 /* scratch = fxam (op1) */
44578 emit_insn (gen_fxamxf2_i387 (scratch, op1));
44580 /* e1 = e1 + |op1| */
44581 emit_insn (gen_absxf2 (e2, op1));
44582 emit_insn (gen_addxf3 (e1, e1, e2));
44584 /* e2 = log1p (e1) */
44585 ix86_emit_i387_log1p (e2, e1);
44587 /* flags = signbit (op1) */
44588 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44590 /* if (flags) then e2 = -e2 */
44591 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44592 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44593 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44595 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44596 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44597 JUMP_LABEL (insn) = jump_label;
44599 emit_insn (gen_negxf2 (e2, e2));
44601 emit_label (jump_label);
44602 LABEL_NUSES (jump_label) = 1;
44604 emit_move_insn (op0, e2);
44607 /* Output code to perform an acosh XFmode calculation. */
44609 void ix86_emit_i387_acosh (rtx op0, rtx op1)
44611 rtx e1 = gen_reg_rtx (XFmode);
44612 rtx e2 = gen_reg_rtx (XFmode);
44613 rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
44615 /* e2 = sqrt (op1 + 1.0) */
44616 emit_insn (gen_addxf3 (e2, op1, cst1));
44617 emit_insn (gen_sqrtxf2 (e2, e2));
44619 /* e1 = sqrt (op1 - 1.0) */
44620 emit_insn (gen_subxf3 (e1, op1, cst1));
44621 emit_insn (gen_sqrtxf2 (e1, e1));
44624 emit_insn (gen_mulxf3 (e1, e1, e2));
44626 /* e1 = e1 + op1 */
44627 emit_insn (gen_addxf3 (e1, e1, op1));
44629 /* op0 = log (e1) */
44630 emit_insn (gen_logxf2 (op0, e1));
44633 /* Output code to perform an atanh XFmode calculation. */
44635 void ix86_emit_i387_atanh (rtx op0, rtx op1)
44637 rtx e1 = gen_reg_rtx (XFmode);
44638 rtx e2 = gen_reg_rtx (XFmode);
44639 rtx scratch = gen_reg_rtx (HImode);
44640 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44641 rtx half = const_double_from_real_value (dconsthalf, XFmode);
44643 rtx_code_label *jump_label = gen_label_rtx ();
44646 /* scratch = fxam (op1) */
44647 emit_insn (gen_fxamxf2_i387 (scratch, op1));
44650 emit_insn (gen_absxf2 (e2, op1));
44652 /* e1 = -(e2 + e2) / (e2 + 1.0) */
44653 cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
44654 emit_insn (gen_addxf3 (e1, e2, cst1));
44655 emit_insn (gen_addxf3 (e2, e2, e2));
44656 emit_insn (gen_negxf2 (e2, e2));
44657 emit_insn (gen_divxf3 (e1, e2, e1));
44659 /* e2 = log1p (e1) */
44660 ix86_emit_i387_log1p (e2, e1);
44662 /* flags = signbit (op1) */
44663 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44665 /* if (!flags) then e2 = -e2 */
44666 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44667 gen_rtx_NE (VOIDmode, flags, const0_rtx),
44668 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44670 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44671 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44672 JUMP_LABEL (insn) = jump_label;
44674 emit_insn (gen_negxf2 (e2, e2));
44676 emit_label (jump_label);
44677 LABEL_NUSES (jump_label) = 1;
44679 /* op0 = 0.5 * e2 */
44680 half = force_reg (XFmode, half);
44681 emit_insn (gen_mulxf3 (op0, e2, half));
44684 /* Output code to perform a log1p XFmode calculation. */
44686 void ix86_emit_i387_log1p (rtx op0, rtx op1)
44688 rtx_code_label *label1 = gen_label_rtx ();
44689 rtx_code_label *label2 = gen_label_rtx ();
44691 rtx tmp = gen_reg_rtx (XFmode);
44692 rtx res = gen_reg_rtx (XFmode);
44693 rtx cst, cstln2, cst1;
44696 cst = const_double_from_real_value
44697 (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), XFmode);
44698 cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
44700 emit_insn (gen_absxf2 (tmp, op1));
44702 cst = force_reg (XFmode, cst);
44703 ix86_expand_branch (GE, tmp, cst, label1);
44704 predict_jump (REG_BR_PROB_BASE * 10 / 100);
44705 insn = get_last_insn ();
44706 JUMP_LABEL (insn) = label1;
44708 emit_insn (gen_fyl2xp1xf3_i387 (res, op1, cstln2));
44709 emit_jump (label2);
44711 emit_label (label1);
44712 LABEL_NUSES (label1) = 1;
44714 cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
44715 emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (XFmode, op1, cst1)));
44716 emit_insn (gen_fyl2xxf3_i387 (res, tmp, cstln2));
44718 emit_label (label2);
44719 LABEL_NUSES (label2) = 1;
44721 emit_move_insn (op0, res);
44724 /* Emit code for round calculation. */
44725 void ix86_emit_i387_round (rtx op0, rtx op1)
44727 machine_mode inmode = GET_MODE (op1);
44728 machine_mode outmode = GET_MODE (op0);
44729 rtx e1 = gen_reg_rtx (XFmode);
44730 rtx e2 = gen_reg_rtx (XFmode);
44731 rtx scratch = gen_reg_rtx (HImode);
44732 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44733 rtx half = const_double_from_real_value (dconsthalf, XFmode);
44734 rtx res = gen_reg_rtx (outmode);
44735 rtx_code_label *jump_label = gen_label_rtx ();
44736 rtx (*floor_insn) (rtx, rtx);
44737 rtx (*neg_insn) (rtx, rtx);
44745 tmp = gen_reg_rtx (XFmode);
44747 emit_insn (gen_rtx_SET (tmp, gen_rtx_FLOAT_EXTEND (XFmode, op1)));
44753 gcc_unreachable ();
44759 floor_insn = gen_frndintxf2_floor;
44760 neg_insn = gen_negsf2;
44763 floor_insn = gen_frndintxf2_floor;
44764 neg_insn = gen_negdf2;
44767 floor_insn = gen_frndintxf2_floor;
44768 neg_insn = gen_negxf2;
44771 floor_insn = gen_lfloorxfhi2;
44772 neg_insn = gen_neghi2;
44775 floor_insn = gen_lfloorxfsi2;
44776 neg_insn = gen_negsi2;
44779 floor_insn = gen_lfloorxfdi2;
44780 neg_insn = gen_negdi2;
44783 gcc_unreachable ();
44786 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
44788 /* scratch = fxam(op1) */
44789 emit_insn (gen_fxamxf2_i387 (scratch, op1));
44791 /* e1 = fabs(op1) */
44792 emit_insn (gen_absxf2 (e1, op1));
44794 /* e2 = e1 + 0.5 */
44795 half = force_reg (XFmode, half);
44796 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (XFmode, e1, half)));
44798 /* res = floor(e2) */
44804 tmp = gen_reg_rtx (XFmode);
44806 emit_insn (floor_insn (tmp, e2));
44807 emit_insn (gen_rtx_SET (res,
44808 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp),
44809 UNSPEC_TRUNC_NOOP)));
44813 emit_insn (floor_insn (res, e2));
44816 /* flags = signbit(a) */
44817 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44819 /* if (flags) then res = -res */
44820 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44821 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44822 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44824 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44825 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44826 JUMP_LABEL (insn) = jump_label;
44828 emit_insn (neg_insn (res, res));
44830 emit_label (jump_label);
44831 LABEL_NUSES (jump_label) = 1;
44833 emit_move_insn (op0, res);
44836 /* Output code to perform a Newton-Rhapson approximation of a single precision
44837 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
44839 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
44841 rtx x0, x1, e0, e1;
44843 x0 = gen_reg_rtx (mode);
44844 e0 = gen_reg_rtx (mode);
44845 e1 = gen_reg_rtx (mode);
44846 x1 = gen_reg_rtx (mode);
44848 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
44850 b = force_reg (mode, b);
44852 /* x0 = rcp(b) estimate */
44853 if (mode == V16SFmode || mode == V8DFmode)
44855 if (TARGET_AVX512ER)
44857 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44860 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
44864 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44868 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44872 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
44875 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
44878 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
44881 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
44884 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
44887 /* Output code to perform a Newton-Rhapson approximation of a
44888 single precision floating point [reciprocal] square root. */
44890 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
44892 rtx x0, e0, e1, e2, e3, mthree, mhalf;
44896 x0 = gen_reg_rtx (mode);
44897 e0 = gen_reg_rtx (mode);
44898 e1 = gen_reg_rtx (mode);
44899 e2 = gen_reg_rtx (mode);
44900 e3 = gen_reg_rtx (mode);
44902 if (TARGET_AVX512ER && mode == V16SFmode)
44905 /* res = rsqrt28(a) estimate */
44906 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
44910 /* x0 = rsqrt28(a) estimate */
44911 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
44913 /* res = rcp28(x0) estimate */
44914 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
44920 real_from_integer (&r, VOIDmode, -3, SIGNED);
44921 mthree = const_double_from_real_value (r, SFmode);
44923 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
44924 mhalf = const_double_from_real_value (r, SFmode);
44925 unspec = UNSPEC_RSQRT;
44927 if (VECTOR_MODE_P (mode))
44929 mthree = ix86_build_const_vector (mode, true, mthree);
44930 mhalf = ix86_build_const_vector (mode, true, mhalf);
44931 /* There is no 512-bit rsqrt. There is however rsqrt14. */
44932 if (GET_MODE_SIZE (mode) == 64)
44933 unspec = UNSPEC_RSQRT14;
44936 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
44937 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
44939 a = force_reg (mode, a);
44941 /* x0 = rsqrt(a) estimate */
44942 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
44945 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
44948 rtx zero = force_reg (mode, CONST0_RTX(mode));
44951 /* Handle masked compare. */
44952 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
44954 mask = gen_reg_rtx (HImode);
44955 /* Imm value 0x4 corresponds to not-equal comparison. */
44956 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
44957 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
44961 mask = gen_reg_rtx (mode);
44962 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
44963 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
44968 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
44970 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
44973 mthree = force_reg (mode, mthree);
44974 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
44976 mhalf = force_reg (mode, mhalf);
44978 /* e3 = -.5 * x0 */
44979 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
44981 /* e3 = -.5 * e0 */
44982 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
44983 /* ret = e2 * e3 */
44984 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
44987 #ifdef TARGET_SOLARIS
44988 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
44991 i386_solaris_elf_named_section (const char *name, unsigned int flags,
44994 /* With Binutils 2.15, the "@unwind" marker must be specified on
44995 every occurrence of the ".eh_frame" section, not just the first
44998 && strcmp (name, ".eh_frame") == 0)
45000 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45001 flags & SECTION_WRITE ? "aw" : "a");
45006 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45008 solaris_elf_asm_comdat_section (name, flags, decl);
45012 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
45013 SPARC assembler. One cannot mix single-letter flags and #exclude, so
45014 only emit the latter here. */
45015 if (flags & SECTION_EXCLUDE)
45017 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
45022 default_elf_asm_named_section (name, flags, decl);
45024 #endif /* TARGET_SOLARIS */
45026 /* Return the mangling of TYPE if it is an extended fundamental type. */
45028 static const char *
45029 ix86_mangle_type (const_tree type)
45031 type = TYPE_MAIN_VARIANT (type);
45033 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45034 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45037 switch (TYPE_MODE (type))
45040 /* __float128 is "g". */
45043 /* "long double" or __float80 is "e". */
45050 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
45053 ix86_stack_protect_guard (void)
45055 if (TARGET_SSP_TLS_GUARD)
45057 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
45058 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
45059 tree type = build_qualified_type (type_node, qual);
45062 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
45064 t = ix86_tls_stack_chk_guard_decl;
45071 (UNKNOWN_LOCATION, VAR_DECL,
45072 get_identifier (ix86_stack_protector_guard_symbol_str),
45074 TREE_STATIC (t) = 1;
45075 TREE_PUBLIC (t) = 1;
45076 DECL_EXTERNAL (t) = 1;
45078 TREE_THIS_VOLATILE (t) = 1;
45079 DECL_ARTIFICIAL (t) = 1;
45080 DECL_IGNORED_P (t) = 1;
45082 /* Do not share RTL as the declaration is visible outside of
45083 current function. */
45085 RTX_FLAG (x, used) = 1;
45087 ix86_tls_stack_chk_guard_decl = t;
45092 tree asptrtype = build_pointer_type (type);
45094 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
45095 t = build2 (MEM_REF, asptrtype, t,
45096 build_int_cst (asptrtype, 0));
45097 TREE_THIS_VOLATILE (t) = 1;
45103 return default_stack_protect_guard ();
45106 /* For 32-bit code we can save PIC register setup by using
45107 __stack_chk_fail_local hidden function instead of calling
45108 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45109 register, so it is better to call __stack_chk_fail directly. */
45111 static tree ATTRIBUTE_UNUSED
45112 ix86_stack_protect_fail (void)
45114 return TARGET_64BIT
45115 ? default_external_stack_protect_fail ()
45116 : default_hidden_stack_protect_fail ();
45119 /* Select a format to encode pointers in exception handling data. CODE
45120 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45121 true if the symbol may be affected by dynamic relocations.
45123 ??? All x86 object file formats are capable of representing this.
45124 After all, the relocation needed is the same as for the call insn.
45125 Whether or not a particular assembler allows us to enter such, I
45126 guess we'll have to see. */
45128 asm_preferred_eh_data_format (int code, int global)
45132 int type = DW_EH_PE_sdata8;
45134 || ix86_cmodel == CM_SMALL_PIC
45135 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45136 type = DW_EH_PE_sdata4;
45137 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45139 if (ix86_cmodel == CM_SMALL
45140 || (ix86_cmodel == CM_MEDIUM && code))
45141 return DW_EH_PE_udata4;
45142 return DW_EH_PE_absptr;
45145 /* Expand copysign from SIGN to the positive value ABS_VALUE
45146 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45149 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45151 machine_mode mode = GET_MODE (sign);
45152 rtx sgn = gen_reg_rtx (mode);
45153 if (mask == NULL_RTX)
45155 machine_mode vmode;
45157 if (mode == SFmode)
45159 else if (mode == DFmode)
45164 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45165 if (!VECTOR_MODE_P (mode))
45167 /* We need to generate a scalar mode mask in this case. */
45168 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45169 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45170 mask = gen_reg_rtx (mode);
45171 emit_insn (gen_rtx_SET (mask, tmp));
45175 mask = gen_rtx_NOT (mode, mask);
45176 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
45177 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
45180 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45181 mask for masking out the sign-bit is stored in *SMASK, if that is
45184 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45186 machine_mode vmode, mode = GET_MODE (op0);
45189 xa = gen_reg_rtx (mode);
45190 if (mode == SFmode)
45192 else if (mode == DFmode)
45196 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45197 if (!VECTOR_MODE_P (mode))
45199 /* We need to generate a scalar mode mask in this case. */
45200 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45201 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45202 mask = gen_reg_rtx (mode);
45203 emit_insn (gen_rtx_SET (mask, tmp));
45205 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
45213 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45214 swapping the operands if SWAP_OPERANDS is true. The expanded
45215 code is a forward jump to a newly created label in case the
45216 comparison is true. The generated label rtx is returned. */
45217 static rtx_code_label *
45218 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45219 bool swap_operands)
45221 bool unordered_compare = ix86_unordered_fp_compare (code);
45222 rtx_code_label *label;
45226 std::swap (op0, op1);
45228 label = gen_label_rtx ();
45229 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
45230 if (unordered_compare)
45231 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
45232 reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
45233 emit_insn (gen_rtx_SET (reg, tmp));
45234 tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
45235 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45236 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45237 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45238 JUMP_LABEL (tmp) = label;
45243 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45244 using comparison code CODE. Operands are swapped for the comparison if
45245 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45247 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45248 bool swap_operands)
45250 rtx (*insn)(rtx, rtx, rtx, rtx);
45251 machine_mode mode = GET_MODE (op0);
45252 rtx mask = gen_reg_rtx (mode);
45255 std::swap (op0, op1);
45257 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45259 emit_insn (insn (mask, op0, op1,
45260 gen_rtx_fmt_ee (code, mode, op0, op1)));
45264 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45265 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45267 ix86_gen_TWO52 (machine_mode mode)
45269 REAL_VALUE_TYPE TWO52r;
45272 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45273 TWO52 = const_double_from_real_value (TWO52r, mode);
45274 TWO52 = force_reg (mode, TWO52);
45279 /* Expand SSE sequence for computing lround from OP1 storing
45282 ix86_expand_lround (rtx op0, rtx op1)
45284 /* C code for the stuff we're doing below:
45285 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45288 machine_mode mode = GET_MODE (op1);
45289 const struct real_format *fmt;
45290 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45293 /* load nextafter (0.5, 0.0) */
45294 fmt = REAL_MODE_FORMAT (mode);
45295 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45296 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45298 /* adj = copysign (0.5, op1) */
45299 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45300 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45302 /* adj = op1 + adj */
45303 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45305 /* op0 = (imode)adj */
45306 expand_fix (op0, adj, 0);
45309 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45312 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45314 /* C code for the stuff we're doing below (for do_floor):
45316 xi -= (double)xi > op1 ? 1 : 0;
45319 machine_mode fmode = GET_MODE (op1);
45320 machine_mode imode = GET_MODE (op0);
45321 rtx ireg, freg, tmp;
45322 rtx_code_label *label;
45324 /* reg = (long)op1 */
45325 ireg = gen_reg_rtx (imode);
45326 expand_fix (ireg, op1, 0);
45328 /* freg = (double)reg */
45329 freg = gen_reg_rtx (fmode);
45330 expand_float (freg, ireg, 0);
45332 /* ireg = (freg > op1) ? ireg - 1 : ireg */
45333 label = ix86_expand_sse_compare_and_jump (UNLE,
45334 freg, op1, !do_floor);
45335 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
45336 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
45337 emit_move_insn (ireg, tmp);
45339 emit_label (label);
45340 LABEL_NUSES (label) = 1;
45342 emit_move_insn (op0, ireg);
45345 /* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
45347 ix86_expand_rint (rtx operand0, rtx operand1)
45349 /* C code for the stuff we're doing below:
45350 xa = fabs (operand1);
45351 if (!isless (xa, 2**52))
45354 if (flag_rounding_math)
45356 two52 = copysign (two52, operand1);
45359 xa = xa + two52 - two52;
45360 return copysign (xa, operand1);
45362 machine_mode mode = GET_MODE (operand0);
45363 rtx res, xa, TWO52, two52, mask;
45364 rtx_code_label *label;
45366 res = gen_reg_rtx (mode);
45367 emit_move_insn (res, operand1);
45369 /* xa = abs (operand1) */
45370 xa = ix86_expand_sse_fabs (res, &mask);
45372 /* if (!isless (xa, TWO52)) goto label; */
45373 TWO52 = ix86_gen_TWO52 (mode);
45374 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45377 if (flag_rounding_math)
45379 two52 = gen_reg_rtx (mode);
45380 ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
45384 xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
45385 xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
45387 ix86_sse_copysign_to_positive (res, xa, res, mask);
45389 emit_label (label);
45390 LABEL_NUSES (label) = 1;
45392 emit_move_insn (operand0, res);
45395 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45398 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
45400 /* C code for the stuff we expand below.
45401 double xa = fabs (x), x2;
45402 if (!isless (xa, TWO52))
45404 xa = xa + TWO52 - TWO52;
45405 x2 = copysign (xa, x);
45414 machine_mode mode = GET_MODE (operand0);
45415 rtx xa, TWO52, tmp, one, res, mask;
45416 rtx_code_label *label;
45418 TWO52 = ix86_gen_TWO52 (mode);
45420 /* Temporary for holding the result, initialized to the input
45421 operand to ease control flow. */
45422 res = gen_reg_rtx (mode);
45423 emit_move_insn (res, operand1);
45425 /* xa = abs (operand1) */
45426 xa = ix86_expand_sse_fabs (res, &mask);
45428 /* if (!isless (xa, TWO52)) goto label; */
45429 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45431 /* xa = xa + TWO52 - TWO52; */
45432 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45433 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45435 /* xa = copysign (xa, operand1) */
45436 ix86_sse_copysign_to_positive (xa, xa, res, mask);
45438 /* generate 1.0 or -1.0 */
45439 one = force_reg (mode,
45440 const_double_from_real_value (do_floor
45441 ? dconst1 : dconstm1, mode));
45443 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45444 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45445 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45446 /* We always need to subtract here to preserve signed zero. */
45447 tmp = expand_simple_binop (mode, MINUS,
45448 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45449 emit_move_insn (res, tmp);
45451 emit_label (label);
45452 LABEL_NUSES (label) = 1;
45454 emit_move_insn (operand0, res);
45457 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45460 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
45462 /* C code for the stuff we expand below.
45463 double xa = fabs (x), x2;
45464 if (!isless (xa, TWO52))
45466 x2 = (double)(long)x;
45473 if (HONOR_SIGNED_ZEROS (mode))
45474 return copysign (x2, x);
45477 machine_mode mode = GET_MODE (operand0);
45478 rtx xa, xi, TWO52, tmp, one, res, mask;
45479 rtx_code_label *label;
45481 TWO52 = ix86_gen_TWO52 (mode);
45483 /* Temporary for holding the result, initialized to the input
45484 operand to ease control flow. */
45485 res = gen_reg_rtx (mode);
45486 emit_move_insn (res, operand1);
45488 /* xa = abs (operand1) */
45489 xa = ix86_expand_sse_fabs (res, &mask);
45491 /* if (!isless (xa, TWO52)) goto label; */
45492 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45494 /* xa = (double)(long)x */
45495 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45496 expand_fix (xi, res, 0);
45497 expand_float (xa, xi, 0);
45500 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45502 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45503 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45504 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45505 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
45506 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45507 emit_move_insn (res, tmp);
45509 if (HONOR_SIGNED_ZEROS (mode))
45510 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45512 emit_label (label);
45513 LABEL_NUSES (label) = 1;
45515 emit_move_insn (operand0, res);
45518 /* Expand SSE sequence for computing round from OPERAND1 storing
45519 into OPERAND0. Sequence that works without relying on DImode truncation
45520 via cvttsd2siq that is only available on 64bit targets. */
45522 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
45524 /* C code for the stuff we expand below.
45525 double xa = fabs (x), xa2, x2;
45526 if (!isless (xa, TWO52))
45528 Using the absolute value and copying back sign makes
45529 -0.0 -> -0.0 correct.
45530 xa2 = xa + TWO52 - TWO52;
45535 else if (dxa > 0.5)
45537 x2 = copysign (xa2, x);
45540 machine_mode mode = GET_MODE (operand0);
45541 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
45542 rtx_code_label *label;
45544 TWO52 = ix86_gen_TWO52 (mode);
45546 /* Temporary for holding the result, initialized to the input
45547 operand to ease control flow. */
45548 res = gen_reg_rtx (mode);
45549 emit_move_insn (res, operand1);
45551 /* xa = abs (operand1) */
45552 xa = ix86_expand_sse_fabs (res, &mask);
45554 /* if (!isless (xa, TWO52)) goto label; */
45555 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45557 /* xa2 = xa + TWO52 - TWO52; */
45558 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45559 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
45561 /* dxa = xa2 - xa; */
45562 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
45564 /* generate 0.5, 1.0 and -0.5 */
45565 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
45566 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
45567 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
45571 tmp = gen_reg_rtx (mode);
45572 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
45573 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
45574 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45575 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45576 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
45577 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
45578 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45579 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45581 /* res = copysign (xa2, operand1) */
45582 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
45584 emit_label (label);
45585 LABEL_NUSES (label) = 1;
45587 emit_move_insn (operand0, res);
45590 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45593 ix86_expand_trunc (rtx operand0, rtx operand1)
45595 /* C code for SSE variant we expand below.
45596 double xa = fabs (x), x2;
45597 if (!isless (xa, TWO52))
45599 x2 = (double)(long)x;
45600 if (HONOR_SIGNED_ZEROS (mode))
45601 return copysign (x2, x);
45604 machine_mode mode = GET_MODE (operand0);
45605 rtx xa, xi, TWO52, res, mask;
45606 rtx_code_label *label;
45608 TWO52 = ix86_gen_TWO52 (mode);
45610 /* Temporary for holding the result, initialized to the input
45611 operand to ease control flow. */
45612 res = gen_reg_rtx (mode);
45613 emit_move_insn (res, operand1);
45615 /* xa = abs (operand1) */
45616 xa = ix86_expand_sse_fabs (res, &mask);
45618 /* if (!isless (xa, TWO52)) goto label; */
45619 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45621 /* x = (double)(long)x */
45622 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45623 expand_fix (xi, res, 0);
45624 expand_float (res, xi, 0);
45626 if (HONOR_SIGNED_ZEROS (mode))
45627 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45629 emit_label (label);
45630 LABEL_NUSES (label) = 1;
45632 emit_move_insn (operand0, res);
45635 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45638 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
45640 machine_mode mode = GET_MODE (operand0);
45641 rtx xa, mask, TWO52, one, res, smask, tmp;
45642 rtx_code_label *label;
45644 /* C code for SSE variant we expand below.
45645 double xa = fabs (x), x2;
45646 if (!isless (xa, TWO52))
45648 xa2 = xa + TWO52 - TWO52;
45652 x2 = copysign (xa2, x);
45656 TWO52 = ix86_gen_TWO52 (mode);
45658 /* Temporary for holding the result, initialized to the input
45659 operand to ease control flow. */
45660 res = gen_reg_rtx (mode);
45661 emit_move_insn (res, operand1);
45663 /* xa = abs (operand1) */
45664 xa = ix86_expand_sse_fabs (res, &smask);
45666 /* if (!isless (xa, TWO52)) goto label; */
45667 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45669 /* res = xa + TWO52 - TWO52; */
45670 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45671 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
45672 emit_move_insn (res, tmp);
45675 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45677 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
45678 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
45679 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
45680 tmp = expand_simple_binop (mode, MINUS,
45681 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
45682 emit_move_insn (res, tmp);
45684 /* res = copysign (res, operand1) */
45685 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
45687 emit_label (label);
45688 LABEL_NUSES (label) = 1;
45690 emit_move_insn (operand0, res);
45693 /* Expand SSE sequence for computing round from OPERAND1 storing
45696 ix86_expand_round (rtx operand0, rtx operand1)
45698 /* C code for the stuff we're doing below:
45699 double xa = fabs (x);
45700 if (!isless (xa, TWO52))
45702 xa = (double)(long)(xa + nextafter (0.5, 0.0));
45703 return copysign (xa, x);
45705 machine_mode mode = GET_MODE (operand0);
45706 rtx res, TWO52, xa, xi, half, mask;
45707 rtx_code_label *label;
45708 const struct real_format *fmt;
45709 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45711 /* Temporary for holding the result, initialized to the input
45712 operand to ease control flow. */
45713 res = gen_reg_rtx (mode);
45714 emit_move_insn (res, operand1);
45716 TWO52 = ix86_gen_TWO52 (mode);
45717 xa = ix86_expand_sse_fabs (res, &mask);
45718 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45720 /* load nextafter (0.5, 0.0) */
45721 fmt = REAL_MODE_FORMAT (mode);
45722 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45723 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45725 /* xa = xa + 0.5 */
45726 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
45727 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
45729 /* xa = (double)(int64_t)xa */
45730 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45731 expand_fix (xi, xa, 0);
45732 expand_float (xa, xi, 0);
45734 /* res = copysign (xa, operand1) */
45735 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
45737 emit_label (label);
45738 LABEL_NUSES (label) = 1;
45740 emit_move_insn (operand0, res);
45743 /* Expand SSE sequence for computing round
45744 from OP1 storing into OP0 using sse4 round insn. */
45746 ix86_expand_round_sse4 (rtx op0, rtx op1)
45748 machine_mode mode = GET_MODE (op0);
45749 rtx e1, e2, res, half;
45750 const struct real_format *fmt;
45751 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45752 rtx (*gen_copysign) (rtx, rtx, rtx);
45753 rtx (*gen_round) (rtx, rtx, rtx);
45758 gen_copysign = gen_copysignsf3;
45759 gen_round = gen_sse4_1_roundsf2;
45762 gen_copysign = gen_copysigndf3;
45763 gen_round = gen_sse4_1_rounddf2;
45766 gcc_unreachable ();
45769 /* round (a) = trunc (a + copysign (0.5, a)) */
45771 /* load nextafter (0.5, 0.0) */
45772 fmt = REAL_MODE_FORMAT (mode);
45773 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45774 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45775 half = const_double_from_real_value (pred_half, mode);
45777 /* e1 = copysign (0.5, op1) */
45778 e1 = gen_reg_rtx (mode);
45779 emit_insn (gen_copysign (e1, half, op1));
45781 /* e2 = op1 + e1 */
45782 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
45784 /* res = trunc (e2) */
45785 res = gen_reg_rtx (mode);
45786 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
45788 emit_move_insn (op0, res);
45791 /* Handle fentry_name / fentry_section attribute. */
45794 ix86_handle_fentry_name (tree *node, tree name, tree args,
45795 int, bool *no_add_attrs)
45797 if (TREE_CODE (*node) == FUNCTION_DECL
45798 && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
45799 /* Do nothing else, just set the attribute. We'll get at
45800 it later with lookup_attribute. */
45804 warning (OPT_Wattributes, "%qE attribute ignored", name);
45805 *no_add_attrs = true;
45812 /* Table of valid machine attributes. */
45813 static const struct attribute_spec ix86_attribute_table[] =
45815 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
45816 affects_type_identity, handler, exclude } */
45817 /* Stdcall attribute says callee is responsible for popping arguments
45818 if they are not variable. */
45819 { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45821 /* Fastcall attribute says callee is responsible for popping arguments
45822 if they are not variable. */
45823 { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45825 /* Thiscall attribute says callee is responsible for popping arguments
45826 if they are not variable. */
45827 { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45829 /* Cdecl attribute says the callee is a normal C declaration */
45830 { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45832 /* Regparm attribute specifies how many integer arguments are to be
45833 passed in registers. */
45834 { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute,
45836 /* Sseregparm attribute says we are using x86_64 calling conventions
45837 for FP arguments. */
45838 { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45840 /* The transactional memory builtins are implicitly regparm or fastcall
45841 depending on the ABI. Override the generic do-nothing attribute that
45842 these builtins were declared with. */
45843 { "*tm regparm", 0, 0, false, true, true, true,
45844 ix86_handle_tm_regparm_attribute, NULL },
45845 /* force_align_arg_pointer says this function realigns the stack at entry. */
45846 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
45847 false, true, true, false, ix86_handle_force_align_arg_pointer_attribute,
45849 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
45850 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
45852 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
45854 { "shared", 0, 0, true, false, false, false,
45855 ix86_handle_shared_attribute, NULL },
45857 { "ms_struct", 0, 0, false, false, false, false,
45858 ix86_handle_struct_attribute, NULL },
45859 { "gcc_struct", 0, 0, false, false, false, false,
45860 ix86_handle_struct_attribute, NULL },
45861 #ifdef SUBTARGET_ATTRIBUTE_TABLE
45862 SUBTARGET_ATTRIBUTE_TABLE,
45864 /* ms_abi and sysv_abi calling convention function attributes. */
45865 { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL },
45866 { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute,
45868 { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
45869 { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
45870 { "ms_hook_prologue", 0, 0, true, false, false, false,
45871 ix86_handle_fndecl_attribute, NULL },
45872 { "callee_pop_aggregate_return", 1, 1, false, true, true, true,
45873 ix86_handle_callee_pop_aggregate_return, NULL },
45874 { "interrupt", 0, 0, false, true, true, false,
45875 ix86_handle_interrupt_attribute, NULL },
45876 { "no_caller_saved_registers", 0, 0, false, true, true, false,
45877 ix86_handle_no_caller_saved_registers_attribute, NULL },
45878 { "naked", 0, 0, true, false, false, false,
45879 ix86_handle_fndecl_attribute, NULL },
45880 { "indirect_branch", 1, 1, true, false, false, false,
45881 ix86_handle_fndecl_attribute, NULL },
45882 { "function_return", 1, 1, true, false, false, false,
45883 ix86_handle_fndecl_attribute, NULL },
45884 { "indirect_return", 0, 0, false, true, true, false,
45886 { "fentry_name", 1, 1, true, false, false, false,
45887 ix86_handle_fentry_name, NULL },
45888 { "fentry_section", 1, 1, true, false, false, false,
45889 ix86_handle_fentry_name, NULL },
45890 { "cf_check", 0, 0, true, false, false, false,
45891 ix86_handle_fndecl_attribute, NULL },
45894 { NULL, 0, 0, false, false, false, false, NULL, NULL }
45897 /* Implement targetm.vectorize.builtin_vectorization_cost. */
45899 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
45903 machine_mode mode = TImode;
45905 if (vectype != NULL)
45907 fp = FLOAT_TYPE_P (vectype);
45908 mode = TYPE_MODE (vectype);
45911 switch (type_of_cost)
45914 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
45917 /* load/store costs are relative to register move which is 2. Recompute
45918 it to COSTS_N_INSNS so everything have same base. */
45919 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
45920 : ix86_cost->int_load [2]) / 2;
45923 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
45924 : ix86_cost->int_store [2]) / 2;
45927 return ix86_vec_cost (mode,
45928 fp ? ix86_cost->addss : ix86_cost->sse_op);
45931 index = sse_store_index (mode);
45932 /* See PR82713 - we may end up being called on non-vector type. */
45935 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
45938 index = sse_store_index (mode);
45939 /* See PR82713 - we may end up being called on non-vector type. */
45942 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
45944 case vec_to_scalar:
45945 case scalar_to_vec:
45946 return ix86_vec_cost (mode, ix86_cost->sse_op);
45948 /* We should have separate costs for unaligned loads and gather/scatter.
45949 Do that incrementally. */
45950 case unaligned_load:
45951 index = sse_store_index (mode);
45952 /* See PR82713 - we may end up being called on non-vector type. */
45955 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
45957 case unaligned_store:
45958 index = sse_store_index (mode);
45959 /* See PR82713 - we may end up being called on non-vector type. */
45962 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
45964 case vector_gather_load:
45965 return ix86_vec_cost (mode,
45967 (ix86_cost->gather_static
45968 + ix86_cost->gather_per_elt
45969 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
45971 case vector_scatter_store:
45972 return ix86_vec_cost (mode,
45974 (ix86_cost->scatter_static
45975 + ix86_cost->scatter_per_elt
45976 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
45978 case cond_branch_taken:
45979 return ix86_cost->cond_taken_branch_cost;
45981 case cond_branch_not_taken:
45982 return ix86_cost->cond_not_taken_branch_cost;
45985 case vec_promote_demote:
45986 return ix86_vec_cost (mode, ix86_cost->sse_op);
45988 case vec_construct:
45990 /* N element inserts into SSE vectors. */
45991 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
45992 /* One vinserti128 for combining two SSE vectors for AVX256. */
45993 if (GET_MODE_BITSIZE (mode) == 256)
45994 cost += ix86_vec_cost (mode, ix86_cost->addss);
45995 /* One vinserti64x4 and two vinserti128 for combining SSE
45996 and AVX256 vectors to AVX512. */
45997 else if (GET_MODE_BITSIZE (mode) == 512)
45998 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
46003 gcc_unreachable ();
46007 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46008 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46009 insn every time. */
46011 static GTY(()) rtx_insn *vselect_insn;
46013 /* Initialize vselect_insn. */
46016 init_vselect_insn (void)
46021 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46022 for (i = 0; i < MAX_VECT_LEN; ++i)
46023 XVECEXP (x, 0, i) = const0_rtx;
46024 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46026 x = gen_rtx_SET (const0_rtx, x);
46028 vselect_insn = emit_insn (x);
46032 /* Construct (set target (vec_select op0 (parallel perm))) and
46033 return true if that's a valid instruction in the active ISA. */
46036 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46037 unsigned nelt, bool testing_p)
46040 rtx x, save_vconcat;
46043 if (vselect_insn == NULL_RTX)
46044 init_vselect_insn ();
46046 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46047 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46048 for (i = 0; i < nelt; ++i)
46049 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46050 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46051 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46052 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46053 SET_DEST (PATTERN (vselect_insn)) = target;
46054 icode = recog_memoized (vselect_insn);
46056 if (icode >= 0 && !testing_p)
46057 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46059 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46060 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46061 INSN_CODE (vselect_insn) = -1;
46066 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46069 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46070 const unsigned char *perm, unsigned nelt,
46073 machine_mode v2mode;
46077 if (vselect_insn == NULL_RTX)
46078 init_vselect_insn ();
46080 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
46082 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46083 PUT_MODE (x, v2mode);
46086 ok = expand_vselect (target, x, perm, nelt, testing_p);
46087 XEXP (x, 0) = const0_rtx;
46088 XEXP (x, 1) = const0_rtx;
46092 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46093 using movss or movsd. */
46095 expand_vec_perm_movs (struct expand_vec_perm_d *d)
46097 machine_mode vmode = d->vmode;
46098 unsigned i, nelt = d->nelt;
46101 if (d->one_operand_p)
46104 if (!(TARGET_SSE && vmode == V4SFmode)
46105 && !(TARGET_SSE2 && vmode == V2DFmode))
46108 /* Only the first element is changed. */
46109 if (d->perm[0] != nelt && d->perm[0] != 0)
46111 for (i = 1; i < nelt; ++i)
46112 if (d->perm[i] != i + nelt - d->perm[0])
46118 if (d->perm[0] == nelt)
46119 x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1));
46121 x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1));
46123 emit_insn (gen_rtx_SET (d->target, x));
46128 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46129 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46132 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46134 machine_mode mmode, vmode = d->vmode;
46135 unsigned i, mask, nelt = d->nelt;
46136 rtx target, op0, op1, maskop, x;
46137 rtx rperm[32], vperm;
46139 if (d->one_operand_p)
46141 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46142 && (TARGET_AVX512BW
46143 || GET_MODE_UNIT_SIZE (vmode) >= 4))
46145 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46147 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46149 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46154 /* This is a blend, not a permute. Elements must stay in their
46155 respective lanes. */
46156 for (i = 0; i < nelt; ++i)
46158 unsigned e = d->perm[i];
46159 if (!(e == i || e == i + nelt))
46166 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46167 decision should be extracted elsewhere, so that we only try that
46168 sequence once all budget==3 options have been tried. */
46169 target = d->target;
46188 for (i = 0; i < nelt; ++i)
46189 mask |= (d->perm[i] >= nelt) << i;
46193 for (i = 0; i < 2; ++i)
46194 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46199 for (i = 0; i < 4; ++i)
46200 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46205 /* See if bytes move in pairs so we can use pblendw with
46206 an immediate argument, rather than pblendvb with a vector
46208 for (i = 0; i < 16; i += 2)
46209 if (d->perm[i] + 1 != d->perm[i + 1])
46212 for (i = 0; i < nelt; ++i)
46213 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46216 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46217 vperm = force_reg (vmode, vperm);
46219 if (GET_MODE_SIZE (vmode) == 16)
46220 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46222 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46223 if (target != d->target)
46224 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46228 for (i = 0; i < 8; ++i)
46229 mask |= (d->perm[i * 2] >= 16) << i;
46234 target = gen_reg_rtx (vmode);
46235 op0 = gen_lowpart (vmode, op0);
46236 op1 = gen_lowpart (vmode, op1);
46240 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46241 for (i = 0; i < 32; i += 2)
46242 if (d->perm[i] + 1 != d->perm[i + 1])
46244 /* See if bytes move in quadruplets. If yes, vpblendd
46245 with immediate can be used. */
46246 for (i = 0; i < 32; i += 4)
46247 if (d->perm[i] + 2 != d->perm[i + 2])
46251 /* See if bytes move the same in both lanes. If yes,
46252 vpblendw with immediate can be used. */
46253 for (i = 0; i < 16; i += 2)
46254 if (d->perm[i] + 16 != d->perm[i + 16])
46257 /* Use vpblendw. */
46258 for (i = 0; i < 16; ++i)
46259 mask |= (d->perm[i * 2] >= 32) << i;
46264 /* Use vpblendd. */
46265 for (i = 0; i < 8; ++i)
46266 mask |= (d->perm[i * 4] >= 32) << i;
46271 /* See if words move in pairs. If yes, vpblendd can be used. */
46272 for (i = 0; i < 16; i += 2)
46273 if (d->perm[i] + 1 != d->perm[i + 1])
46277 /* See if words move the same in both lanes. If not,
46278 vpblendvb must be used. */
46279 for (i = 0; i < 8; i++)
46280 if (d->perm[i] + 8 != d->perm[i + 8])
46282 /* Use vpblendvb. */
46283 for (i = 0; i < 32; ++i)
46284 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46288 target = gen_reg_rtx (vmode);
46289 op0 = gen_lowpart (vmode, op0);
46290 op1 = gen_lowpart (vmode, op1);
46291 goto finish_pblendvb;
46294 /* Use vpblendw. */
46295 for (i = 0; i < 16; ++i)
46296 mask |= (d->perm[i] >= 16) << i;
46300 /* Use vpblendd. */
46301 for (i = 0; i < 8; ++i)
46302 mask |= (d->perm[i * 2] >= 16) << i;
46307 /* Use vpblendd. */
46308 for (i = 0; i < 4; ++i)
46309 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46314 gcc_unreachable ();
46337 if (mmode != VOIDmode)
46338 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
46340 maskop = GEN_INT (mask);
46342 /* This matches five different patterns with the different modes. */
46343 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
46344 x = gen_rtx_SET (target, x);
46346 if (target != d->target)
46347 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46352 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46353 in terms of the variable form of vpermilps.
46355 Note that we will have already failed the immediate input vpermilps,
46356 which requires that the high and low part shuffle be identical; the
46357 variable form doesn't require that. */
46360 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46362 rtx rperm[8], vperm;
46365 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46368 /* We can only permute within the 128-bit lane. */
46369 for (i = 0; i < 8; ++i)
46371 unsigned e = d->perm[i];
46372 if (i < 4 ? e >= 4 : e < 4)
46379 for (i = 0; i < 8; ++i)
46381 unsigned e = d->perm[i];
46383 /* Within each 128-bit lane, the elements of op0 are numbered
46384 from 0 and the elements of op1 are numbered from 4. */
46390 rperm[i] = GEN_INT (e);
46393 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46394 vperm = force_reg (V8SImode, vperm);
46395 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46400 /* Return true if permutation D can be performed as VMODE permutation
46404 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46406 unsigned int i, j, chunk;
46408 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46409 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46410 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46413 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46416 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46417 for (i = 0; i < d->nelt; i += chunk)
46418 if (d->perm[i] & (chunk - 1))
46421 for (j = 1; j < chunk; ++j)
46422 if (d->perm[i] + j != d->perm[i + j])
46428 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46429 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46432 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46434 unsigned i, nelt, eltsz, mask;
46435 unsigned char perm[64];
46436 machine_mode vmode = V16QImode;
46437 rtx rperm[64], vperm, target, op0, op1;
46441 if (!d->one_operand_p)
46443 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46446 && valid_perm_using_mode_p (V2TImode, d))
46451 /* Use vperm2i128 insn. The pattern uses
46452 V4DImode instead of V2TImode. */
46453 target = d->target;
46454 if (d->vmode != V4DImode)
46455 target = gen_reg_rtx (V4DImode);
46456 op0 = gen_lowpart (V4DImode, d->op0);
46457 op1 = gen_lowpart (V4DImode, d->op1);
46459 = GEN_INT ((d->perm[0] / (nelt / 2))
46460 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46461 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46462 if (target != d->target)
46463 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46471 if (GET_MODE_SIZE (d->vmode) == 16)
46476 else if (GET_MODE_SIZE (d->vmode) == 32)
46481 /* V4DImode should be already handled through
46482 expand_vselect by vpermq instruction. */
46483 gcc_assert (d->vmode != V4DImode);
46486 if (d->vmode == V8SImode
46487 || d->vmode == V16HImode
46488 || d->vmode == V32QImode)
46490 /* First see if vpermq can be used for
46491 V8SImode/V16HImode/V32QImode. */
46492 if (valid_perm_using_mode_p (V4DImode, d))
46494 for (i = 0; i < 4; i++)
46495 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
46498 target = gen_reg_rtx (V4DImode);
46499 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
46502 emit_move_insn (d->target,
46503 gen_lowpart (d->vmode, target));
46509 /* Next see if vpermd can be used. */
46510 if (valid_perm_using_mode_p (V8SImode, d))
46513 /* Or if vpermps can be used. */
46514 else if (d->vmode == V8SFmode)
46517 if (vmode == V32QImode)
46519 /* vpshufb only works intra lanes, it is not
46520 possible to shuffle bytes in between the lanes. */
46521 for (i = 0; i < nelt; ++i)
46522 if ((d->perm[i] ^ i) & (nelt / 2))
46526 else if (GET_MODE_SIZE (d->vmode) == 64)
46528 if (!TARGET_AVX512BW)
46531 /* If vpermq didn't work, vpshufb won't work either. */
46532 if (d->vmode == V8DFmode || d->vmode == V8DImode)
46536 if (d->vmode == V16SImode
46537 || d->vmode == V32HImode
46538 || d->vmode == V64QImode)
46540 /* First see if vpermq can be used for
46541 V16SImode/V32HImode/V64QImode. */
46542 if (valid_perm_using_mode_p (V8DImode, d))
46544 for (i = 0; i < 8; i++)
46545 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
46548 target = gen_reg_rtx (V8DImode);
46549 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
46552 emit_move_insn (d->target,
46553 gen_lowpart (d->vmode, target));
46559 /* Next see if vpermd can be used. */
46560 if (valid_perm_using_mode_p (V16SImode, d))
46563 /* Or if vpermps can be used. */
46564 else if (d->vmode == V16SFmode)
46566 if (vmode == V64QImode)
46568 /* vpshufb only works intra lanes, it is not
46569 possible to shuffle bytes in between the lanes. */
46570 for (i = 0; i < nelt; ++i)
46571 if ((d->perm[i] ^ i) & (nelt / 4))
46582 if (vmode == V8SImode)
46583 for (i = 0; i < 8; ++i)
46584 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
46585 else if (vmode == V16SImode)
46586 for (i = 0; i < 16; ++i)
46587 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
46590 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46591 if (!d->one_operand_p)
46592 mask = 2 * nelt - 1;
46593 else if (vmode == V16QImode)
46595 else if (vmode == V64QImode)
46596 mask = nelt / 4 - 1;
46598 mask = nelt / 2 - 1;
46600 for (i = 0; i < nelt; ++i)
46602 unsigned j, e = d->perm[i] & mask;
46603 for (j = 0; j < eltsz; ++j)
46604 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
46608 vperm = gen_rtx_CONST_VECTOR (vmode,
46609 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
46610 vperm = force_reg (vmode, vperm);
46612 target = d->target;
46613 if (d->vmode != vmode)
46614 target = gen_reg_rtx (vmode);
46615 op0 = gen_lowpart (vmode, d->op0);
46616 if (d->one_operand_p)
46618 if (vmode == V16QImode)
46619 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
46620 else if (vmode == V32QImode)
46621 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
46622 else if (vmode == V64QImode)
46623 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
46624 else if (vmode == V8SFmode)
46625 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
46626 else if (vmode == V8SImode)
46627 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
46628 else if (vmode == V16SFmode)
46629 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
46630 else if (vmode == V16SImode)
46631 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
46633 gcc_unreachable ();
46637 op1 = gen_lowpart (vmode, d->op1);
46638 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
46640 if (target != d->target)
46641 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46646 /* For V*[QHS]Imode permutations, check if the same permutation
46647 can't be performed in a 2x, 4x or 8x wider inner mode. */
46650 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
46651 struct expand_vec_perm_d *nd)
46654 machine_mode mode = VOIDmode;
46658 case E_V16QImode: mode = V8HImode; break;
46659 case E_V32QImode: mode = V16HImode; break;
46660 case E_V64QImode: mode = V32HImode; break;
46661 case E_V8HImode: mode = V4SImode; break;
46662 case E_V16HImode: mode = V8SImode; break;
46663 case E_V32HImode: mode = V16SImode; break;
46664 case E_V4SImode: mode = V2DImode; break;
46665 case E_V8SImode: mode = V4DImode; break;
46666 case E_V16SImode: mode = V8DImode; break;
46667 default: return false;
46669 for (i = 0; i < d->nelt; i += 2)
46670 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
46673 nd->nelt = d->nelt / 2;
46674 for (i = 0; i < nd->nelt; i++)
46675 nd->perm[i] = d->perm[2 * i] / 2;
46676 if (GET_MODE_INNER (mode) != DImode)
46677 canonicalize_vector_int_perm (nd, nd);
46680 nd->one_operand_p = d->one_operand_p;
46681 nd->testing_p = d->testing_p;
46682 if (d->op0 == d->op1)
46683 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
46686 nd->op0 = gen_lowpart (nd->vmode, d->op0);
46687 nd->op1 = gen_lowpart (nd->vmode, d->op1);
46690 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
46692 nd->target = gen_reg_rtx (nd->vmode);
46697 /* Try to expand one-operand permutation with constant mask. */
46700 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
46702 machine_mode mode = GET_MODE (d->op0);
46703 machine_mode maskmode = mode;
46704 rtx (*gen) (rtx, rtx, rtx) = NULL;
46705 rtx target, op0, mask;
46708 if (!rtx_equal_p (d->op0, d->op1))
46711 if (!TARGET_AVX512F)
46717 gen = gen_avx512f_permvarv16si;
46720 gen = gen_avx512f_permvarv16sf;
46721 maskmode = V16SImode;
46724 gen = gen_avx512f_permvarv8di;
46727 gen = gen_avx512f_permvarv8df;
46728 maskmode = V8DImode;
46734 target = d->target;
46736 for (int i = 0; i < d->nelt; ++i)
46737 vec[i] = GEN_INT (d->perm[i]);
46738 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
46739 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
46743 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
46744 in a single instruction. */
46747 expand_vec_perm_1 (struct expand_vec_perm_d *d)
46749 unsigned i, nelt = d->nelt;
46750 struct expand_vec_perm_d nd;
46752 /* Check plain VEC_SELECT first, because AVX has instructions that could
46753 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
46754 input where SEL+CONCAT may not. */
46755 if (d->one_operand_p)
46757 int mask = nelt - 1;
46758 bool identity_perm = true;
46759 bool broadcast_perm = true;
46761 for (i = 0; i < nelt; i++)
46763 nd.perm[i] = d->perm[i] & mask;
46764 if (nd.perm[i] != i)
46765 identity_perm = false;
46767 broadcast_perm = false;
46773 emit_move_insn (d->target, d->op0);
46776 else if (broadcast_perm && TARGET_AVX2)
46778 /* Use vpbroadcast{b,w,d}. */
46779 rtx (*gen) (rtx, rtx) = NULL;
46783 if (TARGET_AVX512BW)
46784 gen = gen_avx512bw_vec_dupv64qi_1;
46787 gen = gen_avx2_pbroadcastv32qi_1;
46790 if (TARGET_AVX512BW)
46791 gen = gen_avx512bw_vec_dupv32hi_1;
46794 gen = gen_avx2_pbroadcastv16hi_1;
46797 if (TARGET_AVX512F)
46798 gen = gen_avx512f_vec_dupv16si_1;
46801 gen = gen_avx2_pbroadcastv8si_1;
46804 gen = gen_avx2_pbroadcastv16qi;
46807 gen = gen_avx2_pbroadcastv8hi;
46810 if (TARGET_AVX512F)
46811 gen = gen_avx512f_vec_dupv16sf_1;
46814 gen = gen_avx2_vec_dupv8sf_1;
46817 if (TARGET_AVX512F)
46818 gen = gen_avx512f_vec_dupv8df_1;
46821 if (TARGET_AVX512F)
46822 gen = gen_avx512f_vec_dupv8di_1;
46824 /* For other modes prefer other shuffles this function creates. */
46830 emit_insn (gen (d->target, d->op0));
46835 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
46838 /* There are plenty of patterns in sse.md that are written for
46839 SEL+CONCAT and are not replicated for a single op. Perhaps
46840 that should be changed, to avoid the nastiness here. */
46842 /* Recognize interleave style patterns, which means incrementing
46843 every other permutation operand. */
46844 for (i = 0; i < nelt; i += 2)
46846 nd.perm[i] = d->perm[i] & mask;
46847 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
46849 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
46853 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
46856 for (i = 0; i < nelt; i += 4)
46858 nd.perm[i + 0] = d->perm[i + 0] & mask;
46859 nd.perm[i + 1] = d->perm[i + 1] & mask;
46860 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
46861 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
46864 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
46870 /* Try movss/movsd instructions. */
46871 if (expand_vec_perm_movs (d))
46874 /* Finally, try the fully general two operand permute. */
46875 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
46879 /* Recognize interleave style patterns with reversed operands. */
46880 if (!d->one_operand_p)
46882 for (i = 0; i < nelt; ++i)
46884 unsigned e = d->perm[i];
46892 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
46897 /* Try the SSE4.1 blend variable merge instructions. */
46898 if (expand_vec_perm_blend (d))
46901 /* Try one of the AVX vpermil variable permutations. */
46902 if (expand_vec_perm_vpermil (d))
46905 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
46906 vpshufb, vpermd, vpermps or vpermq variable permutation. */
46907 if (expand_vec_perm_pshufb (d))
46910 /* Try the AVX2 vpalignr instruction. */
46911 if (expand_vec_perm_palignr (d, true))
46914 /* Try the AVX512F vperm{s,d} instructions. */
46915 if (ix86_expand_vec_one_operand_perm_avx512 (d))
46918 /* Try the AVX512F vpermt2/vpermi2 instructions. */
46919 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
46922 /* See if we can get the same permutation in different vector integer
46924 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
46927 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
46933 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46934 in terms of a pair of pshuflw + pshufhw instructions. */
46937 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
46939 unsigned char perm2[MAX_VECT_LEN];
46943 if (d->vmode != V8HImode || !d->one_operand_p)
46946 /* The two permutations only operate in 64-bit lanes. */
46947 for (i = 0; i < 4; ++i)
46948 if (d->perm[i] >= 4)
46950 for (i = 4; i < 8; ++i)
46951 if (d->perm[i] < 4)
46957 /* Emit the pshuflw. */
46958 memcpy (perm2, d->perm, 4);
46959 for (i = 4; i < 8; ++i)
46961 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
46964 /* Emit the pshufhw. */
46965 memcpy (perm2 + 4, d->perm + 4, 4);
46966 for (i = 0; i < 4; ++i)
46968 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
46974 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46975 the permutation using the SSSE3 palignr instruction. This succeeds
46976 when all of the elements in PERM fit within one vector and we merely
46977 need to shift them down so that a single vector permutation has a
46978 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
46979 the vpalignr instruction itself can perform the requested permutation. */
46982 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
46984 unsigned i, nelt = d->nelt;
46985 unsigned min, max, minswap, maxswap;
46986 bool in_order, ok, swap = false;
46988 struct expand_vec_perm_d dcopy;
46990 /* Even with AVX, palignr only operates on 128-bit vectors,
46991 in AVX2 palignr operates on both 128-bit lanes. */
46992 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46993 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
46998 minswap = 2 * nelt;
47000 for (i = 0; i < nelt; ++i)
47002 unsigned e = d->perm[i];
47003 unsigned eswap = d->perm[i] ^ nelt;
47004 if (GET_MODE_SIZE (d->vmode) == 32)
47006 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47007 eswap = e ^ (nelt / 2);
47013 if (eswap < minswap)
47015 if (eswap > maxswap)
47019 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47021 if (d->one_operand_p
47023 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47024 ? nelt / 2 : nelt))
47031 /* Given that we have SSSE3, we know we'll be able to implement the
47032 single operand permutation after the palignr with pshufb for
47033 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47035 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47041 dcopy.op0 = d->op1;
47042 dcopy.op1 = d->op0;
47043 for (i = 0; i < nelt; ++i)
47044 dcopy.perm[i] ^= nelt;
47048 for (i = 0; i < nelt; ++i)
47050 unsigned e = dcopy.perm[i];
47051 if (GET_MODE_SIZE (d->vmode) == 32
47053 && (e & (nelt / 2 - 1)) < min)
47054 e = e - min - (nelt / 2);
47061 dcopy.one_operand_p = true;
47063 if (single_insn_only_p && !in_order)
47066 /* For AVX2, test whether we can permute the result in one instruction. */
47071 dcopy.op1 = dcopy.op0;
47072 return expand_vec_perm_1 (&dcopy);
47075 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
47076 if (GET_MODE_SIZE (d->vmode) == 16)
47078 target = gen_reg_rtx (TImode);
47079 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47080 gen_lowpart (TImode, dcopy.op0), shift));
47084 target = gen_reg_rtx (V2TImode);
47085 emit_insn (gen_avx2_palignrv2ti (target,
47086 gen_lowpart (V2TImode, dcopy.op1),
47087 gen_lowpart (V2TImode, dcopy.op0),
47091 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47093 /* Test for the degenerate case where the alignment by itself
47094 produces the desired permutation. */
47097 emit_move_insn (d->target, dcopy.op0);
47101 ok = expand_vec_perm_1 (&dcopy);
47102 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47107 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47108 the permutation using the SSE4_1 pblendv instruction. Potentially
47109 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47112 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47114 unsigned i, which, nelt = d->nelt;
47115 struct expand_vec_perm_d dcopy, dcopy1;
47116 machine_mode vmode = d->vmode;
47119 /* Use the same checks as in expand_vec_perm_blend. */
47120 if (d->one_operand_p)
47122 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47124 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47126 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47131 /* Figure out where permutation elements stay not in their
47132 respective lanes. */
47133 for (i = 0, which = 0; i < nelt; ++i)
47135 unsigned e = d->perm[i];
47137 which |= (e < nelt ? 1 : 2);
47139 /* We can pblend the part where elements stay not in their
47140 respective lanes only when these elements are all in one
47141 half of a permutation.
47142 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47143 lanes, but both 8 and 9 >= 8
47144 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47145 respective lanes and 8 >= 8, but 2 not. */
47146 if (which != 1 && which != 2)
47148 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47151 /* First we apply one operand permutation to the part where
47152 elements stay not in their respective lanes. */
47155 dcopy.op0 = dcopy.op1 = d->op1;
47157 dcopy.op0 = dcopy.op1 = d->op0;
47159 dcopy.target = gen_reg_rtx (vmode);
47160 dcopy.one_operand_p = true;
47162 for (i = 0; i < nelt; ++i)
47163 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47165 ok = expand_vec_perm_1 (&dcopy);
47166 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47173 /* Next we put permuted elements into their positions. */
47176 dcopy1.op1 = dcopy.target;
47178 dcopy1.op0 = dcopy.target;
47180 for (i = 0; i < nelt; ++i)
47181 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47183 ok = expand_vec_perm_blend (&dcopy1);
47189 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47192 a two vector permutation into a single vector permutation by using
47193 an interleave operation to merge the vectors. */
47196 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47198 struct expand_vec_perm_d dremap, dfinal;
47199 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47200 unsigned HOST_WIDE_INT contents;
47201 unsigned char remap[2 * MAX_VECT_LEN];
47203 bool ok, same_halves = false;
47205 if (GET_MODE_SIZE (d->vmode) == 16)
47207 if (d->one_operand_p)
47210 else if (GET_MODE_SIZE (d->vmode) == 32)
47214 /* For 32-byte modes allow even d->one_operand_p.
47215 The lack of cross-lane shuffling in some instructions
47216 might prevent a single insn shuffle. */
47218 dfinal.testing_p = true;
47219 /* If expand_vec_perm_interleave3 can expand this into
47220 a 3 insn sequence, give up and let it be expanded as
47221 3 insn sequence. While that is one insn longer,
47222 it doesn't need a memory operand and in the common
47223 case that both interleave low and high permutations
47224 with the same operands are adjacent needs 4 insns
47225 for both after CSE. */
47226 if (expand_vec_perm_interleave3 (&dfinal))
47232 /* Examine from whence the elements come. */
47234 for (i = 0; i < nelt; ++i)
47235 contents |= HOST_WIDE_INT_1U << d->perm[i];
47237 memset (remap, 0xff, sizeof (remap));
47240 if (GET_MODE_SIZE (d->vmode) == 16)
47242 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47244 /* Split the two input vectors into 4 halves. */
47245 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47250 /* If the elements from the low halves use interleave low, and similarly
47251 for interleave high. If the elements are from mis-matched halves, we
47252 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47253 if ((contents & (h1 | h3)) == contents)
47256 for (i = 0; i < nelt2; ++i)
47259 remap[i + nelt] = i * 2 + 1;
47260 dremap.perm[i * 2] = i;
47261 dremap.perm[i * 2 + 1] = i + nelt;
47263 if (!TARGET_SSE2 && d->vmode == V4SImode)
47264 dremap.vmode = V4SFmode;
47266 else if ((contents & (h2 | h4)) == contents)
47269 for (i = 0; i < nelt2; ++i)
47271 remap[i + nelt2] = i * 2;
47272 remap[i + nelt + nelt2] = i * 2 + 1;
47273 dremap.perm[i * 2] = i + nelt2;
47274 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47276 if (!TARGET_SSE2 && d->vmode == V4SImode)
47277 dremap.vmode = V4SFmode;
47279 else if ((contents & (h1 | h4)) == contents)
47282 for (i = 0; i < nelt2; ++i)
47285 remap[i + nelt + nelt2] = i + nelt2;
47286 dremap.perm[i] = i;
47287 dremap.perm[i + nelt2] = i + nelt + nelt2;
47292 dremap.vmode = V2DImode;
47294 dremap.perm[0] = 0;
47295 dremap.perm[1] = 3;
47298 else if ((contents & (h2 | h3)) == contents)
47301 for (i = 0; i < nelt2; ++i)
47303 remap[i + nelt2] = i;
47304 remap[i + nelt] = i + nelt2;
47305 dremap.perm[i] = i + nelt2;
47306 dremap.perm[i + nelt2] = i + nelt;
47311 dremap.vmode = V2DImode;
47313 dremap.perm[0] = 1;
47314 dremap.perm[1] = 2;
47322 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47323 unsigned HOST_WIDE_INT q[8];
47324 unsigned int nonzero_halves[4];
47326 /* Split the two input vectors into 8 quarters. */
47327 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47328 for (i = 1; i < 8; ++i)
47329 q[i] = q[0] << (nelt4 * i);
47330 for (i = 0; i < 4; ++i)
47331 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47333 nonzero_halves[nzcnt] = i;
47339 gcc_assert (d->one_operand_p);
47340 nonzero_halves[1] = nonzero_halves[0];
47341 same_halves = true;
47343 else if (d->one_operand_p)
47345 gcc_assert (nonzero_halves[0] == 0);
47346 gcc_assert (nonzero_halves[1] == 1);
47351 if (d->perm[0] / nelt2 == nonzero_halves[1])
47353 /* Attempt to increase the likelihood that dfinal
47354 shuffle will be intra-lane. */
47355 std::swap (nonzero_halves[0], nonzero_halves[1]);
47358 /* vperm2f128 or vperm2i128. */
47359 for (i = 0; i < nelt2; ++i)
47361 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47362 remap[i + nonzero_halves[0] * nelt2] = i;
47363 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47364 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47367 if (d->vmode != V8SFmode
47368 && d->vmode != V4DFmode
47369 && d->vmode != V8SImode)
47371 dremap.vmode = V8SImode;
47373 for (i = 0; i < 4; ++i)
47375 dremap.perm[i] = i + nonzero_halves[0] * 4;
47376 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47380 else if (d->one_operand_p)
47382 else if (TARGET_AVX2
47383 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47386 for (i = 0; i < nelt4; ++i)
47389 remap[i + nelt] = i * 2 + 1;
47390 remap[i + nelt2] = i * 2 + nelt2;
47391 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47392 dremap.perm[i * 2] = i;
47393 dremap.perm[i * 2 + 1] = i + nelt;
47394 dremap.perm[i * 2 + nelt2] = i + nelt2;
47395 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47398 else if (TARGET_AVX2
47399 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47402 for (i = 0; i < nelt4; ++i)
47404 remap[i + nelt4] = i * 2;
47405 remap[i + nelt + nelt4] = i * 2 + 1;
47406 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47407 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47408 dremap.perm[i * 2] = i + nelt4;
47409 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47410 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47411 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47418 /* Use the remapping array set up above to move the elements from their
47419 swizzled locations into their final destinations. */
47421 for (i = 0; i < nelt; ++i)
47423 unsigned e = remap[d->perm[i]];
47424 gcc_assert (e < nelt);
47425 /* If same_halves is true, both halves of the remapped vector are the
47426 same. Avoid cross-lane accesses if possible. */
47427 if (same_halves && i >= nelt2)
47429 gcc_assert (e < nelt2);
47430 dfinal.perm[i] = e + nelt2;
47433 dfinal.perm[i] = e;
47437 dremap.target = gen_reg_rtx (dremap.vmode);
47438 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47440 dfinal.op1 = dfinal.op0;
47441 dfinal.one_operand_p = true;
47443 /* Test if the final remap can be done with a single insn. For V4SFmode or
47444 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47446 ok = expand_vec_perm_1 (&dfinal);
47447 seq = get_insns ();
47456 if (dremap.vmode != dfinal.vmode)
47458 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47459 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47462 ok = expand_vec_perm_1 (&dremap);
47469 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47470 a single vector cross-lane permutation into vpermq followed
47471 by any of the single insn permutations. */
47474 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47476 struct expand_vec_perm_d dremap, dfinal;
47477 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47478 unsigned contents[2];
47482 && (d->vmode == V32QImode || d->vmode == V16HImode)
47483 && d->one_operand_p))
47488 for (i = 0; i < nelt2; ++i)
47490 contents[0] |= 1u << (d->perm[i] / nelt4);
47491 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47494 for (i = 0; i < 2; ++i)
47496 unsigned int cnt = 0;
47497 for (j = 0; j < 4; ++j)
47498 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47506 dremap.vmode = V4DImode;
47508 dremap.target = gen_reg_rtx (V4DImode);
47509 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47510 dremap.op1 = dremap.op0;
47511 dremap.one_operand_p = true;
47512 for (i = 0; i < 2; ++i)
47514 unsigned int cnt = 0;
47515 for (j = 0; j < 4; ++j)
47516 if ((contents[i] & (1u << j)) != 0)
47517 dremap.perm[2 * i + cnt++] = j;
47518 for (; cnt < 2; ++cnt)
47519 dremap.perm[2 * i + cnt] = 0;
47523 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47524 dfinal.op1 = dfinal.op0;
47525 dfinal.one_operand_p = true;
47526 for (i = 0, j = 0; i < nelt; ++i)
47530 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47531 if ((d->perm[i] / nelt4) == dremap.perm[j])
47533 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47534 dfinal.perm[i] |= nelt4;
47536 gcc_unreachable ();
47539 ok = expand_vec_perm_1 (&dremap);
47542 ok = expand_vec_perm_1 (&dfinal);
47548 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47549 a vector permutation using two instructions, vperm2f128 resp.
47550 vperm2i128 followed by any single in-lane permutation. */
47553 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47555 struct expand_vec_perm_d dfirst, dsecond;
47556 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47560 || GET_MODE_SIZE (d->vmode) != 32
47561 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47565 dsecond.one_operand_p = false;
47566 dsecond.testing_p = true;
47568 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47569 immediate. For perm < 16 the second permutation uses
47570 d->op0 as first operand, for perm >= 16 it uses d->op1
47571 as first operand. The second operand is the result of
47573 for (perm = 0; perm < 32; perm++)
47575 /* Ignore permutations which do not move anything cross-lane. */
47578 /* The second shuffle for e.g. V4DFmode has
47579 0123 and ABCD operands.
47580 Ignore AB23, as 23 is already in the second lane
47581 of the first operand. */
47582 if ((perm & 0xc) == (1 << 2)) continue;
47583 /* And 01CD, as 01 is in the first lane of the first
47585 if ((perm & 3) == 0) continue;
47586 /* And 4567, as then the vperm2[fi]128 doesn't change
47587 anything on the original 4567 second operand. */
47588 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47592 /* The second shuffle for e.g. V4DFmode has
47593 4567 and ABCD operands.
47594 Ignore AB67, as 67 is already in the second lane
47595 of the first operand. */
47596 if ((perm & 0xc) == (3 << 2)) continue;
47597 /* And 45CD, as 45 is in the first lane of the first
47599 if ((perm & 3) == 2) continue;
47600 /* And 0123, as then the vperm2[fi]128 doesn't change
47601 anything on the original 0123 first operand. */
47602 if ((perm & 0xf) == (1 << 2)) continue;
47605 for (i = 0; i < nelt; i++)
47607 j = d->perm[i] / nelt2;
47608 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
47609 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
47610 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
47611 dsecond.perm[i] = d->perm[i] & (nelt - 1);
47619 ok = expand_vec_perm_1 (&dsecond);
47630 /* Found a usable second shuffle. dfirst will be
47631 vperm2f128 on d->op0 and d->op1. */
47632 dsecond.testing_p = false;
47634 dfirst.target = gen_reg_rtx (d->vmode);
47635 for (i = 0; i < nelt; i++)
47636 dfirst.perm[i] = (i & (nelt2 - 1))
47637 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
47639 canonicalize_perm (&dfirst);
47640 ok = expand_vec_perm_1 (&dfirst);
47643 /* And dsecond is some single insn shuffle, taking
47644 d->op0 and result of vperm2f128 (if perm < 16) or
47645 d->op1 and result of vperm2f128 (otherwise). */
47647 dsecond.op0 = dsecond.op1;
47648 dsecond.op1 = dfirst.target;
47650 ok = expand_vec_perm_1 (&dsecond);
47656 /* For one operand, the only useful vperm2f128 permutation is 0x01
47658 if (d->one_operand_p)
47665 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47666 a two vector permutation using 2 intra-lane interleave insns
47667 and cross-lane shuffle for 32-byte vectors. */
47670 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
47673 rtx (*gen) (rtx, rtx, rtx);
47675 if (d->one_operand_p)
47677 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
47679 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
47685 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
47687 for (i = 0; i < nelt; i += 2)
47688 if (d->perm[i] != d->perm[0] + i / 2
47689 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
47699 gen = gen_vec_interleave_highv32qi;
47701 gen = gen_vec_interleave_lowv32qi;
47705 gen = gen_vec_interleave_highv16hi;
47707 gen = gen_vec_interleave_lowv16hi;
47711 gen = gen_vec_interleave_highv8si;
47713 gen = gen_vec_interleave_lowv8si;
47717 gen = gen_vec_interleave_highv4di;
47719 gen = gen_vec_interleave_lowv4di;
47723 gen = gen_vec_interleave_highv8sf;
47725 gen = gen_vec_interleave_lowv8sf;
47729 gen = gen_vec_interleave_highv4df;
47731 gen = gen_vec_interleave_lowv4df;
47734 gcc_unreachable ();
47737 emit_insn (gen (d->target, d->op0, d->op1));
47741 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
47742 a single vector permutation using a single intra-lane vector
47743 permutation, vperm2f128 swapping the lanes and vblend* insn blending
47744 the non-swapped and swapped vectors together. */
47747 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
47749 struct expand_vec_perm_d dfirst, dsecond;
47750 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
47753 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
47757 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
47758 || !d->one_operand_p)
47762 for (i = 0; i < nelt; i++)
47763 dfirst.perm[i] = 0xff;
47764 for (i = 0, msk = 0; i < nelt; i++)
47766 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
47767 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
47769 dfirst.perm[j] = d->perm[i];
47773 for (i = 0; i < nelt; i++)
47774 if (dfirst.perm[i] == 0xff)
47775 dfirst.perm[i] = i;
47778 dfirst.target = gen_reg_rtx (dfirst.vmode);
47781 ok = expand_vec_perm_1 (&dfirst);
47782 seq = get_insns ();
47794 dsecond.op0 = dfirst.target;
47795 dsecond.op1 = dfirst.target;
47796 dsecond.one_operand_p = true;
47797 dsecond.target = gen_reg_rtx (dsecond.vmode);
47798 for (i = 0; i < nelt; i++)
47799 dsecond.perm[i] = i ^ nelt2;
47801 ok = expand_vec_perm_1 (&dsecond);
47804 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
47805 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
47809 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
47810 permutation using two vperm2f128, followed by a vshufpd insn blending
47811 the two vectors together. */
47814 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
47816 struct expand_vec_perm_d dfirst, dsecond, dthird;
47819 if (!TARGET_AVX || (d->vmode != V4DFmode))
47829 dfirst.perm[0] = (d->perm[0] & ~1);
47830 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
47831 dfirst.perm[2] = (d->perm[2] & ~1);
47832 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
47833 dsecond.perm[0] = (d->perm[1] & ~1);
47834 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
47835 dsecond.perm[2] = (d->perm[3] & ~1);
47836 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
47837 dthird.perm[0] = (d->perm[0] % 2);
47838 dthird.perm[1] = (d->perm[1] % 2) + 4;
47839 dthird.perm[2] = (d->perm[2] % 2) + 2;
47840 dthird.perm[3] = (d->perm[3] % 2) + 6;
47842 dfirst.target = gen_reg_rtx (dfirst.vmode);
47843 dsecond.target = gen_reg_rtx (dsecond.vmode);
47844 dthird.op0 = dfirst.target;
47845 dthird.op1 = dsecond.target;
47846 dthird.one_operand_p = false;
47848 canonicalize_perm (&dfirst);
47849 canonicalize_perm (&dsecond);
47851 ok = expand_vec_perm_1 (&dfirst)
47852 && expand_vec_perm_1 (&dsecond)
47853 && expand_vec_perm_1 (&dthird);
47860 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
47861 permutation with two pshufb insns and an ior. We should have already
47862 failed all two instruction sequences. */
47865 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
47867 rtx rperm[2][16], vperm, l, h, op, m128;
47868 unsigned int i, nelt, eltsz;
47870 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47872 gcc_assert (!d->one_operand_p);
47878 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47880 /* Generate two permutation masks. If the required element is within
47881 the given vector it is shuffled into the proper lane. If the required
47882 element is in the other vector, force a zero into the lane by setting
47883 bit 7 in the permutation mask. */
47884 m128 = GEN_INT (-128);
47885 for (i = 0; i < nelt; ++i)
47887 unsigned j, e = d->perm[i];
47888 unsigned which = (e >= nelt);
47892 for (j = 0; j < eltsz; ++j)
47894 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
47895 rperm[1-which][i*eltsz + j] = m128;
47899 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
47900 vperm = force_reg (V16QImode, vperm);
47902 l = gen_reg_rtx (V16QImode);
47903 op = gen_lowpart (V16QImode, d->op0);
47904 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
47906 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
47907 vperm = force_reg (V16QImode, vperm);
47909 h = gen_reg_rtx (V16QImode);
47910 op = gen_lowpart (V16QImode, d->op1);
47911 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
47914 if (d->vmode != V16QImode)
47915 op = gen_reg_rtx (V16QImode);
47916 emit_insn (gen_iorv16qi3 (op, l, h));
47917 if (op != d->target)
47918 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47923 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
47924 with two vpshufb insns, vpermq and vpor. We should have already failed
47925 all two or three instruction sequences. */
47928 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
47930 rtx rperm[2][32], vperm, l, h, hp, op, m128;
47931 unsigned int i, nelt, eltsz;
47934 || !d->one_operand_p
47935 || (d->vmode != V32QImode && d->vmode != V16HImode))
47942 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47944 /* Generate two permutation masks. If the required element is within
47945 the same lane, it is shuffled in. If the required element from the
47946 other lane, force a zero by setting bit 7 in the permutation mask.
47947 In the other mask the mask has non-negative elements if element
47948 is requested from the other lane, but also moved to the other lane,
47949 so that the result of vpshufb can have the two V2TImode halves
47951 m128 = GEN_INT (-128);
47952 for (i = 0; i < nelt; ++i)
47954 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47955 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47957 for (j = 0; j < eltsz; ++j)
47959 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
47960 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
47964 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
47965 vperm = force_reg (V32QImode, vperm);
47967 h = gen_reg_rtx (V32QImode);
47968 op = gen_lowpart (V32QImode, d->op0);
47969 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
47971 /* Swap the 128-byte lanes of h into hp. */
47972 hp = gen_reg_rtx (V4DImode);
47973 op = gen_lowpart (V4DImode, h);
47974 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
47977 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
47978 vperm = force_reg (V32QImode, vperm);
47980 l = gen_reg_rtx (V32QImode);
47981 op = gen_lowpart (V32QImode, d->op0);
47982 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
47985 if (d->vmode != V32QImode)
47986 op = gen_reg_rtx (V32QImode);
47987 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
47988 if (op != d->target)
47989 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47994 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
47995 and extract-odd permutations of two V32QImode and V16QImode operand
47996 with two vpshufb insns, vpor and vpermq. We should have already
47997 failed all two or three instruction sequences. */
48000 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48002 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48003 unsigned int i, nelt, eltsz;
48006 || d->one_operand_p
48007 || (d->vmode != V32QImode && d->vmode != V16HImode))
48010 for (i = 0; i < d->nelt; ++i)
48011 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48018 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48020 /* Generate two permutation masks. In the first permutation mask
48021 the first quarter will contain indexes for the first half
48022 of the op0, the second quarter will contain bit 7 set, third quarter
48023 will contain indexes for the second half of the op0 and the
48024 last quarter bit 7 set. In the second permutation mask
48025 the first quarter will contain bit 7 set, the second quarter
48026 indexes for the first half of the op1, the third quarter bit 7 set
48027 and last quarter indexes for the second half of the op1.
48028 I.e. the first mask e.g. for V32QImode extract even will be:
48029 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48030 (all values masked with 0xf except for -128) and second mask
48031 for extract even will be
48032 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48033 m128 = GEN_INT (-128);
48034 for (i = 0; i < nelt; ++i)
48036 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48037 unsigned which = d->perm[i] >= nelt;
48038 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48040 for (j = 0; j < eltsz; ++j)
48042 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48043 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48047 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48048 vperm = force_reg (V32QImode, vperm);
48050 l = gen_reg_rtx (V32QImode);
48051 op = gen_lowpart (V32QImode, d->op0);
48052 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48054 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48055 vperm = force_reg (V32QImode, vperm);
48057 h = gen_reg_rtx (V32QImode);
48058 op = gen_lowpart (V32QImode, d->op1);
48059 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48061 ior = gen_reg_rtx (V32QImode);
48062 emit_insn (gen_iorv32qi3 (ior, l, h));
48064 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48065 op = gen_reg_rtx (V4DImode);
48066 ior = gen_lowpart (V4DImode, ior);
48067 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48068 const1_rtx, GEN_INT (3)));
48069 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48074 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48075 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48076 with two "and" and "pack" or two "shift" and "pack" insns. We should
48077 have already failed all two instruction sequences. */
48080 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48082 rtx op, dop0, dop1, t;
48083 unsigned i, odd, c, s, nelt = d->nelt;
48084 bool end_perm = false;
48085 machine_mode half_mode;
48086 rtx (*gen_and) (rtx, rtx, rtx);
48087 rtx (*gen_pack) (rtx, rtx, rtx);
48088 rtx (*gen_shift) (rtx, rtx, rtx);
48090 if (d->one_operand_p)
48096 /* Required for "pack". */
48097 if (!TARGET_SSE4_1)
48101 half_mode = V4SImode;
48102 gen_and = gen_andv4si3;
48103 gen_pack = gen_sse4_1_packusdw;
48104 gen_shift = gen_lshrv4si3;
48107 /* No check as all instructions are SSE2. */
48110 half_mode = V8HImode;
48111 gen_and = gen_andv8hi3;
48112 gen_pack = gen_sse2_packuswb;
48113 gen_shift = gen_lshrv8hi3;
48120 half_mode = V8SImode;
48121 gen_and = gen_andv8si3;
48122 gen_pack = gen_avx2_packusdw;
48123 gen_shift = gen_lshrv8si3;
48131 half_mode = V16HImode;
48132 gen_and = gen_andv16hi3;
48133 gen_pack = gen_avx2_packuswb;
48134 gen_shift = gen_lshrv16hi3;
48138 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48139 general shuffles. */
48143 /* Check that permutation is even or odd. */
48148 for (i = 1; i < nelt; ++i)
48149 if (d->perm[i] != 2 * i + odd)
48155 dop0 = gen_reg_rtx (half_mode);
48156 dop1 = gen_reg_rtx (half_mode);
48159 t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
48160 t = force_reg (half_mode, t);
48161 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48162 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48166 emit_insn (gen_shift (dop0,
48167 gen_lowpart (half_mode, d->op0),
48169 emit_insn (gen_shift (dop1,
48170 gen_lowpart (half_mode, d->op1),
48173 /* In AVX2 for 256 bit case we need to permute pack result. */
48174 if (TARGET_AVX2 && end_perm)
48176 op = gen_reg_rtx (d->vmode);
48177 t = gen_reg_rtx (V4DImode);
48178 emit_insn (gen_pack (op, dop0, dop1));
48179 emit_insn (gen_avx2_permv4di_1 (t,
48180 gen_lowpart (V4DImode, op),
48185 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48188 emit_insn (gen_pack (d->target, dop0, dop1));
48193 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48194 and extract-odd permutations of two V64QI operands
48195 with two "shifts", two "truncs" and one "concat" insns for "odd"
48196 and two "truncs" and one concat insn for "even."
48197 Have already failed all two instruction sequences. */
48200 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
48202 rtx t1, t2, t3, t4;
48203 unsigned i, odd, nelt = d->nelt;
48205 if (!TARGET_AVX512BW
48206 || d->one_operand_p
48207 || d->vmode != V64QImode)
48210 /* Check that permutation is even or odd. */
48215 for (i = 1; i < nelt; ++i)
48216 if (d->perm[i] != 2 * i + odd)
48225 t1 = gen_reg_rtx (V32HImode);
48226 t2 = gen_reg_rtx (V32HImode);
48227 emit_insn (gen_lshrv32hi3 (t1,
48228 gen_lowpart (V32HImode, d->op0),
48230 emit_insn (gen_lshrv32hi3 (t2,
48231 gen_lowpart (V32HImode, d->op1),
48236 t1 = gen_lowpart (V32HImode, d->op0);
48237 t2 = gen_lowpart (V32HImode, d->op1);
48240 t3 = gen_reg_rtx (V32QImode);
48241 t4 = gen_reg_rtx (V32QImode);
48242 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
48243 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
48244 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
48249 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48250 and extract-odd permutations. */
48253 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48255 rtx t1, t2, t3, t4, t5;
48262 t1 = gen_reg_rtx (V4DFmode);
48263 t2 = gen_reg_rtx (V4DFmode);
48265 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48266 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48267 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48269 /* Now an unpck[lh]pd will produce the result required. */
48271 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48273 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48279 int mask = odd ? 0xdd : 0x88;
48283 t1 = gen_reg_rtx (V8SFmode);
48284 t2 = gen_reg_rtx (V8SFmode);
48285 t3 = gen_reg_rtx (V8SFmode);
48287 /* Shuffle within the 128-bit lanes to produce:
48288 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48289 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48292 /* Shuffle the lanes around to produce:
48293 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48294 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48297 /* Shuffle within the 128-bit lanes to produce:
48298 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48299 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48301 /* Shuffle within the 128-bit lanes to produce:
48302 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48303 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48305 /* Shuffle the lanes around to produce:
48306 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48307 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48316 /* These are always directly implementable by expand_vec_perm_1. */
48317 gcc_unreachable ();
48321 return expand_vec_perm_even_odd_pack (d);
48322 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48323 return expand_vec_perm_pshufb2 (d);
48328 /* We need 2*log2(N)-1 operations to achieve odd/even
48329 with interleave. */
48330 t1 = gen_reg_rtx (V8HImode);
48331 t2 = gen_reg_rtx (V8HImode);
48332 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48333 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48334 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48335 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48337 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48339 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48345 return expand_vec_perm_even_odd_pack (d);
48349 return expand_vec_perm_even_odd_pack (d);
48352 return expand_vec_perm_even_odd_trunc (d);
48357 struct expand_vec_perm_d d_copy = *d;
48358 d_copy.vmode = V4DFmode;
48360 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
48362 d_copy.target = gen_reg_rtx (V4DFmode);
48363 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48364 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48365 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48368 emit_move_insn (d->target,
48369 gen_lowpart (V4DImode, d_copy.target));
48378 t1 = gen_reg_rtx (V4DImode);
48379 t2 = gen_reg_rtx (V4DImode);
48381 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48382 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48383 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48385 /* Now an vpunpck[lh]qdq will produce the result required. */
48387 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48389 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48396 struct expand_vec_perm_d d_copy = *d;
48397 d_copy.vmode = V8SFmode;
48399 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
48401 d_copy.target = gen_reg_rtx (V8SFmode);
48402 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48403 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48404 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48407 emit_move_insn (d->target,
48408 gen_lowpart (V8SImode, d_copy.target));
48417 t1 = gen_reg_rtx (V8SImode);
48418 t2 = gen_reg_rtx (V8SImode);
48419 t3 = gen_reg_rtx (V4DImode);
48420 t4 = gen_reg_rtx (V4DImode);
48421 t5 = gen_reg_rtx (V4DImode);
48423 /* Shuffle the lanes around into
48424 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48425 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48426 gen_lowpart (V4DImode, d->op1),
48428 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48429 gen_lowpart (V4DImode, d->op1),
48432 /* Swap the 2nd and 3rd position in each lane into
48433 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48434 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48435 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48436 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48437 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48439 /* Now an vpunpck[lh]qdq will produce
48440 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48442 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48443 gen_lowpart (V4DImode, t2));
48445 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48446 gen_lowpart (V4DImode, t2));
48448 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48452 gcc_unreachable ();
48458 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48459 extract-even and extract-odd permutations. */
48462 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48464 unsigned i, odd, nelt = d->nelt;
48467 if (odd != 0 && odd != 1)
48470 for (i = 1; i < nelt; ++i)
48471 if (d->perm[i] != 2 * i + odd)
48474 return expand_vec_perm_even_odd_1 (d, odd);
48477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48478 permutations. We assume that expand_vec_perm_1 has already failed. */
48481 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48483 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48484 machine_mode vmode = d->vmode;
48485 unsigned char perm2[4];
48486 rtx op0 = d->op0, dest;
48493 /* These are special-cased in sse.md so that we can optionally
48494 use the vbroadcast instruction. They expand to two insns
48495 if the input happens to be in a register. */
48496 gcc_unreachable ();
48502 /* These are always implementable using standard shuffle patterns. */
48503 gcc_unreachable ();
48507 /* These can be implemented via interleave. We save one insn by
48508 stopping once we have promoted to V4SImode and then use pshufd. */
48514 rtx (*gen) (rtx, rtx, rtx)
48515 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48516 : gen_vec_interleave_lowv8hi;
48520 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48521 : gen_vec_interleave_highv8hi;
48526 dest = gen_reg_rtx (vmode);
48527 emit_insn (gen (dest, op0, op0));
48528 vmode = get_mode_wider_vector (vmode);
48529 op0 = gen_lowpart (vmode, dest);
48531 while (vmode != V4SImode);
48533 memset (perm2, elt, 4);
48534 dest = gen_reg_rtx (V4SImode);
48535 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48538 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48546 /* For AVX2 broadcasts of the first element vpbroadcast* or
48547 vpermq should be used by expand_vec_perm_1. */
48548 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48552 gcc_unreachable ();
48556 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48557 broadcast permutations. */
48560 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48562 unsigned i, elt, nelt = d->nelt;
48564 if (!d->one_operand_p)
48568 for (i = 1; i < nelt; ++i)
48569 if (d->perm[i] != elt)
48572 return expand_vec_perm_broadcast_1 (d);
48575 /* Implement arbitrary permutations of two V64QImode operands
48576 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
48578 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
48580 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48586 struct expand_vec_perm_d ds[2];
48587 rtx rperm[128], vperm, target0, target1;
48588 unsigned int i, nelt;
48589 machine_mode vmode;
48594 for (i = 0; i < 2; i++)
48597 ds[i].vmode = V32HImode;
48599 ds[i].target = gen_reg_rtx (V32HImode);
48600 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48601 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48604 /* Prepare permutations such that the first one takes care of
48605 putting the even bytes into the right positions or one higher
48606 positions (ds[0]) and the second one takes care of
48607 putting the odd bytes into the right positions or one below
48610 for (i = 0; i < nelt; i++)
48612 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
48615 rperm[i] = constm1_rtx;
48616 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48620 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48621 rperm[i + 64] = constm1_rtx;
48625 bool ok = expand_vec_perm_1 (&ds[0]);
48627 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
48629 ok = expand_vec_perm_1 (&ds[1]);
48631 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
48633 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
48634 vperm = force_reg (vmode, vperm);
48635 target0 = gen_reg_rtx (V64QImode);
48636 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
48638 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
48639 vperm = force_reg (vmode, vperm);
48640 target1 = gen_reg_rtx (V64QImode);
48641 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
48643 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
48647 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48648 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48649 all the shorter instruction sequences. */
48652 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48654 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48655 unsigned int i, nelt, eltsz;
48659 || d->one_operand_p
48660 || (d->vmode != V32QImode && d->vmode != V16HImode))
48667 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48669 /* Generate 4 permutation masks. If the required element is within
48670 the same lane, it is shuffled in. If the required element from the
48671 other lane, force a zero by setting bit 7 in the permutation mask.
48672 In the other mask the mask has non-negative elements if element
48673 is requested from the other lane, but also moved to the other lane,
48674 so that the result of vpshufb can have the two V2TImode halves
48676 m128 = GEN_INT (-128);
48677 for (i = 0; i < 32; ++i)
48679 rperm[0][i] = m128;
48680 rperm[1][i] = m128;
48681 rperm[2][i] = m128;
48682 rperm[3][i] = m128;
48688 for (i = 0; i < nelt; ++i)
48690 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48691 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48692 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48694 for (j = 0; j < eltsz; ++j)
48695 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48696 used[which] = true;
48699 for (i = 0; i < 2; ++i)
48701 if (!used[2 * i + 1])
48706 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48707 gen_rtvec_v (32, rperm[2 * i + 1]));
48708 vperm = force_reg (V32QImode, vperm);
48709 h[i] = gen_reg_rtx (V32QImode);
48710 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48711 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48714 /* Swap the 128-byte lanes of h[X]. */
48715 for (i = 0; i < 2; ++i)
48717 if (h[i] == NULL_RTX)
48719 op = gen_reg_rtx (V4DImode);
48720 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48721 const2_rtx, GEN_INT (3), const0_rtx,
48723 h[i] = gen_lowpart (V32QImode, op);
48726 for (i = 0; i < 2; ++i)
48733 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
48734 vperm = force_reg (V32QImode, vperm);
48735 l[i] = gen_reg_rtx (V32QImode);
48736 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48737 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
48740 for (i = 0; i < 2; ++i)
48744 op = gen_reg_rtx (V32QImode);
48745 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
48752 gcc_assert (l[0] && l[1]);
48754 if (d->vmode != V32QImode)
48755 op = gen_reg_rtx (V32QImode);
48756 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
48757 if (op != d->target)
48758 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48762 /* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits
48763 taken care of, perform the expansion in D and return true on success. */
48766 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
48768 /* Try a single instruction expansion. */
48769 if (expand_vec_perm_1 (d))
48772 /* Try sequences of two instructions. */
48774 if (expand_vec_perm_pshuflw_pshufhw (d))
48777 if (expand_vec_perm_palignr (d, false))
48780 if (expand_vec_perm_interleave2 (d))
48783 if (expand_vec_perm_broadcast (d))
48786 if (expand_vec_perm_vpermq_perm_1 (d))
48789 if (expand_vec_perm_vperm2f128 (d))
48792 if (expand_vec_perm_pblendv (d))
48795 /* Try sequences of three instructions. */
48797 if (expand_vec_perm_even_odd_pack (d))
48800 if (expand_vec_perm_2vperm2f128_vshuf (d))
48803 if (expand_vec_perm_pshufb2 (d))
48806 if (expand_vec_perm_interleave3 (d))
48809 if (expand_vec_perm_vperm2f128_vblend (d))
48812 /* Try sequences of four instructions. */
48814 if (expand_vec_perm_even_odd_trunc (d))
48816 if (expand_vec_perm_vpshufb2_vpermq (d))
48819 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
48822 if (expand_vec_perm_vpermt2_vpshub2 (d))
48825 /* ??? Look for narrow permutations whose element orderings would
48826 allow the promotion to a wider mode. */
48828 /* ??? Look for sequences of interleave or a wider permute that place
48829 the data into the correct lanes for a half-vector shuffle like
48830 pshuf[lh]w or vpermilps. */
48832 /* ??? Look for sequences of interleave that produce the desired results.
48833 The combinatorics of punpck[lh] get pretty ugly... */
48835 if (expand_vec_perm_even_odd (d))
48838 /* Even longer sequences. */
48839 if (expand_vec_perm_vpshufb4_vpermq2 (d))
48842 /* See if we can get the same permutation in different vector integer
48844 struct expand_vec_perm_d nd;
48845 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
48848 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
48855 /* If a permutation only uses one operand, make it clear. Returns true
48856 if the permutation references both operands. */
48859 canonicalize_perm (struct expand_vec_perm_d *d)
48861 int i, which, nelt = d->nelt;
48863 for (i = which = 0; i < nelt; ++i)
48864 which |= (d->perm[i] < nelt ? 1 : 2);
48866 d->one_operand_p = true;
48873 if (!rtx_equal_p (d->op0, d->op1))
48875 d->one_operand_p = false;
48878 /* The elements of PERM do not suggest that only the first operand
48879 is used, but both operands are identical. Allow easier matching
48880 of the permutation by folding the permutation into the single
48885 for (i = 0; i < nelt; ++i)
48886 d->perm[i] &= nelt - 1;
48895 return (which == 3);
48898 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
48901 ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
48902 rtx op1, const vec_perm_indices &sel)
48904 struct expand_vec_perm_d d;
48905 unsigned char perm[MAX_VECT_LEN];
48906 unsigned int i, nelt, which;
48914 gcc_assert (VECTOR_MODE_P (d.vmode));
48915 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
48916 d.testing_p = !target;
48918 gcc_assert (sel.length () == nelt);
48919 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
48921 /* Given sufficient ISA support we can just return true here
48922 for selected vector modes. */
48929 if (!TARGET_AVX512F)
48931 /* All implementable with a single vperm[it]2 insn. */
48936 if (!TARGET_AVX512BW)
48939 /* All implementable with a single vperm[it]2 insn. */
48943 if (!TARGET_AVX512BW)
48946 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
48955 if (d.testing_p && TARGET_AVX512VL)
48956 /* All implementable with a single vperm[it]2 insn. */
48962 if (d.testing_p && TARGET_AVX2)
48963 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
48969 if (d.testing_p && TARGET_AVX2)
48970 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
48977 /* Fall through. */
48982 /* All implementable with a single vpperm insn. */
48983 if (d.testing_p && TARGET_XOP)
48985 /* All implementable with 2 pshufb + 1 ior. */
48986 if (d.testing_p && TARGET_SSSE3)
48993 /* All implementable with shufpd or unpck[lh]pd. */
49001 for (i = which = 0; i < nelt; ++i)
49003 unsigned char e = sel[i];
49004 gcc_assert (e < 2 * nelt);
49007 which |= (e < nelt ? 1 : 2);
49012 /* For all elements from second vector, fold the elements to first. */
49014 for (i = 0; i < nelt; ++i)
49017 /* Check whether the mask can be applied to the vector type. */
49018 d.one_operand_p = (which != 3);
49020 /* Implementable with shufps or pshufd. */
49021 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49024 /* Otherwise we have to go through the motions and see if we can
49025 figure out how to generate the requested permutation. */
49026 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49027 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49028 if (!d.one_operand_p)
49029 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49032 bool ret = ix86_expand_vec_perm_const_1 (&d);
49038 two_args = canonicalize_perm (&d);
49040 if (ix86_expand_vec_perm_const_1 (&d))
49043 /* If the selector says both arguments are needed, but the operands are the
49044 same, the above tried to expand with one_operand_p and flattened selector.
49045 If that didn't work, retry without one_operand_p; we succeeded with that
49047 if (two_args && d.one_operand_p)
49049 d.one_operand_p = false;
49050 memcpy (d.perm, perm, sizeof (perm));
49051 return ix86_expand_vec_perm_const_1 (&d);
49058 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49060 struct expand_vec_perm_d d;
49066 d.vmode = GET_MODE (targ);
49067 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49068 d.one_operand_p = false;
49069 d.testing_p = false;
49071 for (i = 0; i < nelt; ++i)
49072 d.perm[i] = i * 2 + odd;
49074 /* We'll either be able to implement the permutation directly... */
49075 if (expand_vec_perm_1 (&d))
49078 /* ... or we use the special-case patterns. */
49079 expand_vec_perm_even_odd_1 (&d, odd);
49083 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49085 struct expand_vec_perm_d d;
49086 unsigned i, nelt, base;
49092 d.vmode = GET_MODE (targ);
49093 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49094 d.one_operand_p = false;
49095 d.testing_p = false;
49097 base = high_p ? nelt / 2 : 0;
49098 for (i = 0; i < nelt / 2; ++i)
49100 d.perm[i * 2] = i + base;
49101 d.perm[i * 2 + 1] = i + base + nelt;
49104 /* Note that for AVX this isn't one instruction. */
49105 ok = ix86_expand_vec_perm_const_1 (&d);
49110 /* Expand a vector operation CODE for a V*QImode in terms of the
49111 same operation on V*HImode. */
49114 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49116 machine_mode qimode = GET_MODE (dest);
49117 machine_mode himode;
49118 rtx (*gen_il) (rtx, rtx, rtx);
49119 rtx (*gen_ih) (rtx, rtx, rtx);
49120 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49121 struct expand_vec_perm_d d;
49122 bool ok, full_interleave;
49123 bool uns_p = false;
49130 gen_il = gen_vec_interleave_lowv16qi;
49131 gen_ih = gen_vec_interleave_highv16qi;
49134 himode = V16HImode;
49135 gen_il = gen_avx2_interleave_lowv32qi;
49136 gen_ih = gen_avx2_interleave_highv32qi;
49139 himode = V32HImode;
49140 gen_il = gen_avx512bw_interleave_lowv64qi;
49141 gen_ih = gen_avx512bw_interleave_highv64qi;
49144 gcc_unreachable ();
49147 op2_l = op2_h = op2;
49151 /* Unpack data such that we've got a source byte in each low byte of
49152 each word. We don't care what goes into the high byte of each word.
49153 Rather than trying to get zero in there, most convenient is to let
49154 it be a copy of the low byte. */
49155 op2_l = gen_reg_rtx (qimode);
49156 op2_h = gen_reg_rtx (qimode);
49157 emit_insn (gen_il (op2_l, op2, op2));
49158 emit_insn (gen_ih (op2_h, op2, op2));
49160 op1_l = gen_reg_rtx (qimode);
49161 op1_h = gen_reg_rtx (qimode);
49162 emit_insn (gen_il (op1_l, op1, op1));
49163 emit_insn (gen_ih (op1_h, op1, op1));
49164 full_interleave = qimode == V16QImode;
49172 op1_l = gen_reg_rtx (himode);
49173 op1_h = gen_reg_rtx (himode);
49174 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49175 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49176 full_interleave = true;
49179 gcc_unreachable ();
49182 /* Perform the operation. */
49183 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49185 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49187 gcc_assert (res_l && res_h);
49189 /* Merge the data back into the right place. */
49191 d.op0 = gen_lowpart (qimode, res_l);
49192 d.op1 = gen_lowpart (qimode, res_h);
49194 d.nelt = GET_MODE_NUNITS (qimode);
49195 d.one_operand_p = false;
49196 d.testing_p = false;
49198 if (full_interleave)
49200 /* For SSE2, we used an full interleave, so the desired
49201 results are in the even elements. */
49202 for (i = 0; i < d.nelt; ++i)
49207 /* For AVX, the interleave used above was not cross-lane. So the
49208 extraction is evens but with the second and third quarter swapped.
49209 Happily, that is even one insn shorter than even extraction.
49210 For AVX512BW we have 4 lanes. We extract evens from within a lane,
49211 always first from the first and then from the second source operand,
49212 the index bits above the low 4 bits remains the same.
49213 Thus, for d.nelt == 32 we want permutation
49214 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
49215 and for d.nelt == 64 we want permutation
49216 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
49217 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
49218 for (i = 0; i < d.nelt; ++i)
49219 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
49222 ok = ix86_expand_vec_perm_const_1 (&d);
49225 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49226 gen_rtx_fmt_ee (code, qimode, op1, op2));
49229 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49230 if op is CONST_VECTOR with all odd elements equal to their
49231 preceding element. */
49234 const_vector_equal_evenodd_p (rtx op)
49236 machine_mode mode = GET_MODE (op);
49237 int i, nunits = GET_MODE_NUNITS (mode);
49238 if (GET_CODE (op) != CONST_VECTOR
49239 || nunits != CONST_VECTOR_NUNITS (op))
49241 for (i = 0; i < nunits; i += 2)
49242 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49248 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49249 bool uns_p, bool odd_p)
49251 machine_mode mode = GET_MODE (op1);
49252 machine_mode wmode = GET_MODE (dest);
49254 rtx orig_op1 = op1, orig_op2 = op2;
49256 if (!nonimmediate_operand (op1, mode))
49257 op1 = force_reg (mode, op1);
49258 if (!nonimmediate_operand (op2, mode))
49259 op2 = force_reg (mode, op2);
49261 /* We only play even/odd games with vectors of SImode. */
49262 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49264 /* If we're looking for the odd results, shift those members down to
49265 the even slots. For some cpus this is faster than a PSHUFD. */
49268 /* For XOP use vpmacsdqh, but only for smult, as it is only
49270 if (TARGET_XOP && mode == V4SImode && !uns_p)
49272 x = force_reg (wmode, CONST0_RTX (wmode));
49273 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49277 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49278 if (!const_vector_equal_evenodd_p (orig_op1))
49279 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49280 x, NULL, 1, OPTAB_DIRECT);
49281 if (!const_vector_equal_evenodd_p (orig_op2))
49282 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49283 x, NULL, 1, OPTAB_DIRECT);
49284 op1 = gen_lowpart (mode, op1);
49285 op2 = gen_lowpart (mode, op2);
49288 if (mode == V16SImode)
49291 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49293 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49295 else if (mode == V8SImode)
49298 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49300 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49303 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49304 else if (TARGET_SSE4_1)
49305 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49308 rtx s1, s2, t0, t1, t2;
49310 /* The easiest way to implement this without PMULDQ is to go through
49311 the motions as if we are performing a full 64-bit multiply. With
49312 the exception that we need to do less shuffling of the elements. */
49314 /* Compute the sign-extension, aka highparts, of the two operands. */
49315 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49316 op1, pc_rtx, pc_rtx);
49317 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49318 op2, pc_rtx, pc_rtx);
49320 /* Multiply LO(A) * HI(B), and vice-versa. */
49321 t1 = gen_reg_rtx (wmode);
49322 t2 = gen_reg_rtx (wmode);
49323 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49324 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49326 /* Multiply LO(A) * LO(B). */
49327 t0 = gen_reg_rtx (wmode);
49328 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49330 /* Combine and shift the highparts into place. */
49331 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49332 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49335 /* Combine high and low parts. */
49336 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49343 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49344 bool uns_p, bool high_p)
49346 machine_mode wmode = GET_MODE (dest);
49347 machine_mode mode = GET_MODE (op1);
49348 rtx t1, t2, t3, t4, mask;
49353 t1 = gen_reg_rtx (mode);
49354 t2 = gen_reg_rtx (mode);
49355 if (TARGET_XOP && !uns_p)
49357 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49358 shuffle the elements once so that all elements are in the right
49359 place for immediate use: { A C B D }. */
49360 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49361 const1_rtx, GEN_INT (3)));
49362 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49363 const1_rtx, GEN_INT (3)));
49367 /* Put the elements into place for the multiply. */
49368 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49369 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49372 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49376 /* Shuffle the elements between the lanes. After this we
49377 have { A B E F | C D G H } for each operand. */
49378 t1 = gen_reg_rtx (V4DImode);
49379 t2 = gen_reg_rtx (V4DImode);
49380 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49381 const0_rtx, const2_rtx,
49382 const1_rtx, GEN_INT (3)));
49383 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49384 const0_rtx, const2_rtx,
49385 const1_rtx, GEN_INT (3)));
49387 /* Shuffle the elements within the lanes. After this we
49388 have { A A B B | C C D D } or { E E F F | G G H H }. */
49389 t3 = gen_reg_rtx (V8SImode);
49390 t4 = gen_reg_rtx (V8SImode);
49391 mask = GEN_INT (high_p
49392 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49393 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49394 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49395 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49397 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49402 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49403 uns_p, OPTAB_DIRECT);
49404 t2 = expand_binop (mode,
49405 uns_p ? umul_highpart_optab : smul_highpart_optab,
49406 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49407 gcc_assert (t1 && t2);
49409 t3 = gen_reg_rtx (mode);
49410 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49411 emit_move_insn (dest, gen_lowpart (wmode, t3));
49419 t1 = gen_reg_rtx (wmode);
49420 t2 = gen_reg_rtx (wmode);
49421 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49422 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49424 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49428 gcc_unreachable ();
49433 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49435 rtx res_1, res_2, res_3, res_4;
49437 res_1 = gen_reg_rtx (V4SImode);
49438 res_2 = gen_reg_rtx (V4SImode);
49439 res_3 = gen_reg_rtx (V2DImode);
49440 res_4 = gen_reg_rtx (V2DImode);
49441 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49442 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49444 /* Move the results in element 2 down to element 1; we don't care
49445 what goes in elements 2 and 3. Then we can merge the parts
49446 back together with an interleave.
49448 Note that two other sequences were tried:
49449 (1) Use interleaves at the start instead of psrldq, which allows
49450 us to use a single shufps to merge things back at the end.
49451 (2) Use shufps here to combine the two vectors, then pshufd to
49452 put the elements in the correct order.
49453 In both cases the cost of the reformatting stall was too high
49454 and the overall sequence slower. */
49456 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49457 const0_rtx, const2_rtx,
49458 const0_rtx, const0_rtx));
49459 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49460 const0_rtx, const2_rtx,
49461 const0_rtx, const0_rtx));
49462 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49464 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49468 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49470 machine_mode mode = GET_MODE (op0);
49471 rtx t1, t2, t3, t4, t5, t6;
49473 if (TARGET_AVX512DQ && mode == V8DImode)
49474 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49475 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49476 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49477 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49478 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49479 else if (TARGET_XOP && mode == V2DImode)
49481 /* op1: A,B,C,D, op2: E,F,G,H */
49482 op1 = gen_lowpart (V4SImode, op1);
49483 op2 = gen_lowpart (V4SImode, op2);
49485 t1 = gen_reg_rtx (V4SImode);
49486 t2 = gen_reg_rtx (V4SImode);
49487 t3 = gen_reg_rtx (V2DImode);
49488 t4 = gen_reg_rtx (V2DImode);
49491 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49497 /* t2: (B*E),(A*F),(D*G),(C*H) */
49498 emit_insn (gen_mulv4si3 (t2, t1, op2));
49500 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49501 emit_insn (gen_xop_phadddq (t3, t2));
49503 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49504 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49506 /* Multiply lower parts and add all */
49507 t5 = gen_reg_rtx (V2DImode);
49508 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49509 gen_lowpart (V4SImode, op1),
49510 gen_lowpart (V4SImode, op2)));
49511 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49516 machine_mode nmode;
49517 rtx (*umul) (rtx, rtx, rtx);
49519 if (mode == V2DImode)
49521 umul = gen_vec_widen_umult_even_v4si;
49524 else if (mode == V4DImode)
49526 umul = gen_vec_widen_umult_even_v8si;
49529 else if (mode == V8DImode)
49531 umul = gen_vec_widen_umult_even_v16si;
49535 gcc_unreachable ();
49538 /* Multiply low parts. */
49539 t1 = gen_reg_rtx (mode);
49540 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49542 /* Shift input vectors right 32 bits so we can multiply high parts. */
49544 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49545 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49547 /* Multiply high parts by low parts. */
49548 t4 = gen_reg_rtx (mode);
49549 t5 = gen_reg_rtx (mode);
49550 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49551 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49553 /* Combine and shift the highparts back. */
49554 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49555 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49557 /* Combine high and low parts. */
49558 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49561 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49562 gen_rtx_MULT (mode, op1, op2));
49565 /* Return 1 if control tansfer instruction INSN
49566 should be encoded with notrack prefix. */
49569 ix86_notrack_prefixed_insn_p (rtx insn)
49571 if (!insn || !((flag_cf_protection & CF_BRANCH)))
49576 rtx call = get_call_rtx_from (insn);
49577 gcc_assert (call != NULL_RTX);
49578 rtx addr = XEXP (call, 0);
49580 /* Do not emit 'notrack' if it's not an indirect call. */
49582 && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49585 return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
49588 if (JUMP_P (insn) && !flag_cet_switch)
49590 rtx target = JUMP_LABEL (insn);
49591 if (target == NULL_RTX || ANY_RETURN_P (target))
49594 /* Check the jump is a switch table. */
49595 rtx_insn *label = as_a<rtx_insn *> (target);
49596 rtx_insn *table = next_insn (label);
49597 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
49605 /* Calculate integer abs() using only SSE2 instructions. */
49608 ix86_expand_sse2_abs (rtx target, rtx input)
49610 machine_mode mode = GET_MODE (target);
49617 /* For 64-bit signed integer X, with SSE4.2 use
49618 pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X.
49619 Otherwise handle it similarly to V4SImode, except use 64 as W instead of
49620 32 and use logical instead of arithmetic right shift (which is
49621 unimplemented) and subtract. */
49624 tmp0 = gen_reg_rtx (mode);
49625 tmp1 = gen_reg_rtx (mode);
49626 emit_move_insn (tmp1, CONST0_RTX (mode));
49627 if (mode == E_V2DImode)
49628 emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input));
49630 emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input));
49634 tmp0 = expand_simple_binop (mode, LSHIFTRT, input,
49635 GEN_INT (GET_MODE_UNIT_BITSIZE (mode)
49636 - 1), NULL, 0, OPTAB_DIRECT);
49637 tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false);
49640 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49641 NULL, 0, OPTAB_DIRECT);
49642 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49643 target, 0, OPTAB_DIRECT);
49647 /* For 32-bit signed integer X, the best way to calculate the absolute
49648 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49649 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49650 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
49651 NULL, 0, OPTAB_DIRECT);
49652 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49653 NULL, 0, OPTAB_DIRECT);
49654 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49655 target, 0, OPTAB_DIRECT);
49659 /* For 16-bit signed integer X, the best way to calculate the absolute
49660 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49661 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49663 x = expand_simple_binop (mode, SMAX, tmp0, input,
49664 target, 0, OPTAB_DIRECT);
49668 /* For 8-bit signed integer X, the best way to calculate the absolute
49669 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49670 as SSE2 provides the PMINUB insn. */
49671 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49673 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49674 target, 0, OPTAB_DIRECT);
49678 gcc_unreachable ();
49682 emit_move_insn (target, x);
49685 /* Expand an extract from a vector register through pextr insn.
49686 Return true if successful. */
49689 ix86_expand_pextr (rtx *operands)
49691 rtx dst = operands[0];
49692 rtx src = operands[1];
49694 unsigned int size = INTVAL (operands[2]);
49695 unsigned int pos = INTVAL (operands[3]);
49697 if (SUBREG_P (dst))
49699 /* Reject non-lowpart subregs. */
49700 if (SUBREG_BYTE (dst) > 0)
49702 dst = SUBREG_REG (dst);
49705 if (SUBREG_P (src))
49707 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
49708 src = SUBREG_REG (src);
49711 switch (GET_MODE (src))
49720 machine_mode srcmode, dstmode;
49723 if (!int_mode_for_size (size, 0).exists (&dstmode))
49729 if (!TARGET_SSE4_1)
49731 srcmode = V16QImode;
49737 srcmode = V8HImode;
49741 if (!TARGET_SSE4_1)
49743 srcmode = V4SImode;
49747 gcc_assert (TARGET_64BIT);
49748 if (!TARGET_SSE4_1)
49750 srcmode = V2DImode;
49757 /* Reject extractions from misaligned positions. */
49758 if (pos & (size-1))
49761 if (GET_MODE (dst) == dstmode)
49764 d = gen_reg_rtx (dstmode);
49766 /* Construct insn pattern. */
49767 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
49768 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
49770 /* Let the rtl optimizers know about the zero extension performed. */
49771 if (dstmode == QImode || dstmode == HImode)
49773 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
49774 d = gen_lowpart (SImode, d);
49777 emit_insn (gen_rtx_SET (d, pat));
49780 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49789 /* Expand an insert into a vector register through pinsr insn.
49790 Return true if successful. */
49793 ix86_expand_pinsr (rtx *operands)
49795 rtx dst = operands[0];
49796 rtx src = operands[3];
49798 unsigned int size = INTVAL (operands[1]);
49799 unsigned int pos = INTVAL (operands[2]);
49801 if (SUBREG_P (dst))
49803 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49804 dst = SUBREG_REG (dst);
49807 switch (GET_MODE (dst))
49816 machine_mode srcmode, dstmode;
49817 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49820 if (!int_mode_for_size (size, 0).exists (&srcmode))
49826 if (!TARGET_SSE4_1)
49828 dstmode = V16QImode;
49829 pinsr = gen_sse4_1_pinsrb;
49835 dstmode = V8HImode;
49836 pinsr = gen_sse2_pinsrw;
49840 if (!TARGET_SSE4_1)
49842 dstmode = V4SImode;
49843 pinsr = gen_sse4_1_pinsrd;
49847 gcc_assert (TARGET_64BIT);
49848 if (!TARGET_SSE4_1)
49850 dstmode = V2DImode;
49851 pinsr = gen_sse4_1_pinsrq;
49858 /* Reject insertions to misaligned positions. */
49859 if (pos & (size-1))
49862 if (SUBREG_P (src))
49864 unsigned int srcpos = SUBREG_BYTE (src);
49870 extr_ops[0] = gen_reg_rtx (srcmode);
49871 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
49872 extr_ops[2] = GEN_INT (size);
49873 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
49875 if (!ix86_expand_pextr (extr_ops))
49881 src = gen_lowpart (srcmode, SUBREG_REG (src));
49884 if (GET_MODE (dst) == dstmode)
49887 d = gen_reg_rtx (dstmode);
49889 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
49890 gen_lowpart (srcmode, src),
49891 GEN_INT (1 << (pos / size))));
49893 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49902 /* This function returns the calling abi specific va_list type node.
49903 It returns the FNDECL specific va_list type. */
49906 ix86_fn_abi_va_list (tree fndecl)
49909 return va_list_type_node;
49910 gcc_assert (fndecl != NULL_TREE);
49912 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49913 return ms_va_list_type_node;
49915 return sysv_va_list_type_node;
49918 /* Returns the canonical va_list type specified by TYPE. If there
49919 is no valid TYPE provided, it return NULL_TREE. */
49922 ix86_canonical_va_list_type (tree type)
49926 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
49927 return ms_va_list_type_node;
49929 if ((TREE_CODE (type) == ARRAY_TYPE
49930 && integer_zerop (array_type_nelts (type)))
49931 || POINTER_TYPE_P (type))
49933 tree elem_type = TREE_TYPE (type);
49934 if (TREE_CODE (elem_type) == RECORD_TYPE
49935 && lookup_attribute ("sysv_abi va_list",
49936 TYPE_ATTRIBUTES (elem_type)))
49937 return sysv_va_list_type_node;
49943 return std_canonical_va_list_type (type);
49946 /* Iterate through the target-specific builtin types for va_list.
49947 IDX denotes the iterator, *PTREE is set to the result type of
49948 the va_list builtin, and *PNAME to its internal type.
49949 Returns zero if there is no element for this index, otherwise
49950 IDX should be increased upon the next call.
49951 Note, do not iterate a base builtin's name like __builtin_va_list.
49952 Used from c_common_nodes_and_builtins. */
49955 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
49965 *ptree = ms_va_list_type_node;
49966 *pname = "__builtin_ms_va_list";
49970 *ptree = sysv_va_list_type_node;
49971 *pname = "__builtin_sysv_va_list";
49979 #undef TARGET_SCHED_DISPATCH
49980 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
49981 #undef TARGET_SCHED_DISPATCH_DO
49982 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
49983 #undef TARGET_SCHED_REASSOCIATION_WIDTH
49984 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
49985 #undef TARGET_SCHED_REORDER
49986 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
49987 #undef TARGET_SCHED_ADJUST_PRIORITY
49988 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
49989 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
49990 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
49991 ix86_dependencies_evaluation_hook
49994 /* Implementation of reassociation_width target hook used by
49995 reassoc phase to identify parallelism level in reassociated
49996 tree. Statements tree_code is passed in OPC. Arguments type
49997 is passed in MODE. */
50000 ix86_reassociation_width (unsigned int op, machine_mode mode)
50004 if (VECTOR_MODE_P (mode))
50007 if (INTEGRAL_MODE_P (mode))
50008 width = ix86_cost->reassoc_vec_int;
50009 else if (FLOAT_MODE_P (mode))
50010 width = ix86_cost->reassoc_vec_fp;
50015 /* Integer vector instructions execute in FP unit
50016 and can execute 3 additions and one multiplication per cycle. */
50017 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
50018 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
50021 /* Account for targets that splits wide vectors into multiple parts. */
50022 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
50023 div = GET_MODE_BITSIZE (mode) / 128;
50024 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
50025 div = GET_MODE_BITSIZE (mode) / 64;
50026 width = (width + div - 1) / div;
50029 else if (INTEGRAL_MODE_P (mode))
50030 width = ix86_cost->reassoc_int;
50031 else if (FLOAT_MODE_P (mode))
50032 width = ix86_cost->reassoc_fp;
50034 /* Avoid using too many registers in 32bit mode. */
50035 if (!TARGET_64BIT && width > 2)
50040 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50041 place emms and femms instructions. */
50043 static machine_mode
50044 ix86_preferred_simd_mode (scalar_mode mode)
50052 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50054 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50060 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50062 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50068 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50070 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50076 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50078 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50084 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50086 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50092 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50094 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50096 else if (TARGET_SSE2)
50105 /* All CPUs prefer to avoid cross-lane operations so perform reductions
50106 upper against lower halves up to SSE reg size. */
50108 static machine_mode
50109 ix86_split_reduction (machine_mode mode)
50111 /* Reduce lowpart against highpart until we reach SSE reg width to
50112 avoid cross-lane operations. */
50138 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50139 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50140 256bit and 128bit vectors. */
50143 ix86_autovectorize_vector_sizes (vector_sizes *sizes)
50145 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50147 sizes->safe_push (64);
50148 sizes->safe_push (32);
50149 sizes->safe_push (16);
50151 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50153 sizes->safe_push (32);
50154 sizes->safe_push (16);
50158 /* Implemenation of targetm.vectorize.get_mask_mode. */
50160 static opt_machine_mode
50161 ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
50163 unsigned elem_size = vector_size / nunits;
50165 /* Scalar mask case. */
50166 if ((TARGET_AVX512F && vector_size == 64)
50167 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
50169 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
50170 return smallest_int_mode_for_size (nunits);
50173 scalar_int_mode elem_mode
50174 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
50176 gcc_assert (elem_size * nunits == vector_size);
50178 return mode_for_vector (elem_mode, nunits);
50183 /* Return class of registers which could be used for pseudo of MODE
50184 and of class RCLASS for spilling instead of memory. Return NO_REGS
50185 if it is not possible or non-profitable. */
50187 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50190 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50192 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
50194 && TARGET_INTER_UNIT_MOVES_TO_VEC
50195 && TARGET_INTER_UNIT_MOVES_FROM_VEC
50196 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50197 && INTEGER_CLASS_P (rclass))
50198 return ALL_SSE_REGS;
50202 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
50203 but returns a lower bound. */
50205 static unsigned int
50206 ix86_max_noce_ifcvt_seq_cost (edge e)
50208 bool predictable_p = predictable_edge_p (e);
50210 enum compiler_param param
50212 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
50213 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
50215 /* If we have a parameter set, use that, otherwise take a guess using
50217 if (global_options_set.x_param_values[param])
50218 return PARAM_VALUE (param);
50220 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
50223 /* Return true if SEQ is a good candidate as a replacement for the
50224 if-convertible sequence described in IF_INFO. */
50227 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
50229 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
50232 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
50233 Maybe we should allow even more conditional moves as long as they
50234 are used far enough not to stall the CPU, or also consider
50235 IF_INFO->TEST_BB succ edge probabilities. */
50236 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
50238 rtx set = single_set (insn);
50241 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
50243 rtx src = SET_SRC (set);
50244 machine_mode mode = GET_MODE (src);
50245 if (GET_MODE_CLASS (mode) != MODE_INT
50246 && GET_MODE_CLASS (mode) != MODE_FLOAT)
50248 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
50249 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
50251 /* insn is CMOV or FCMOV. */
50252 if (++cmov_cnt > 1)
50256 return default_noce_conversion_profitable_p (seq, if_info);
50259 /* Implement targetm.vectorize.init_cost. */
50262 ix86_init_cost (struct loop *)
50264 unsigned *cost = XNEWVEC (unsigned, 3);
50265 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
50269 /* Implement targetm.vectorize.add_stmt_cost. */
50272 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
50273 struct _stmt_vec_info *stmt_info, int misalign,
50274 enum vect_cost_model_location where)
50276 unsigned *cost = (unsigned *) data;
50277 unsigned retval = 0;
50279 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
50281 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
50282 int stmt_cost = - 1;
50285 machine_mode mode = scalar_p ? SImode : TImode;
50287 if (vectype != NULL)
50289 fp = FLOAT_TYPE_P (vectype);
50290 mode = TYPE_MODE (vectype);
50292 mode = TYPE_MODE (TREE_TYPE (vectype));
50295 if ((kind == vector_stmt || kind == scalar_stmt)
50297 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
50299 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
50300 /*machine_mode inner_mode = mode;
50301 if (VECTOR_MODE_P (mode))
50302 inner_mode = GET_MODE_INNER (mode);*/
50307 case POINTER_PLUS_EXPR:
50309 if (kind == scalar_stmt)
50311 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50312 stmt_cost = ix86_cost->addss;
50313 else if (X87_FLOAT_MODE_P (mode))
50314 stmt_cost = ix86_cost->fadd;
50316 stmt_cost = ix86_cost->add;
50319 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
50320 : ix86_cost->sse_op);
50324 case WIDEN_MULT_EXPR:
50325 case MULT_HIGHPART_EXPR:
50326 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
50329 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50330 stmt_cost = ix86_cost->sse_op;
50331 else if (X87_FLOAT_MODE_P (mode))
50332 stmt_cost = ix86_cost->fchs;
50333 else if (VECTOR_MODE_P (mode))
50334 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
50336 stmt_cost = ix86_cost->add;
50338 case TRUNC_DIV_EXPR:
50339 case CEIL_DIV_EXPR:
50340 case FLOOR_DIV_EXPR:
50341 case ROUND_DIV_EXPR:
50342 case TRUNC_MOD_EXPR:
50343 case CEIL_MOD_EXPR:
50344 case FLOOR_MOD_EXPR:
50346 case ROUND_MOD_EXPR:
50347 case EXACT_DIV_EXPR:
50348 stmt_cost = ix86_division_cost (ix86_cost, mode);
50356 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
50357 stmt_cost = ix86_shift_rotate_cost
50359 TREE_CODE (op2) == INTEGER_CST,
50360 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
50361 true, false, false, NULL, NULL);
50365 /* Only sign-conversions are free. */
50366 if (tree_nop_conversion_p
50367 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
50368 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
50380 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50381 stmt_cost = ix86_cost->sse_op;
50382 else if (VECTOR_MODE_P (mode))
50383 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
50385 stmt_cost = ix86_cost->add;
50393 if ((kind == vector_stmt || kind == scalar_stmt)
50396 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
50400 stmt_cost = ix86_vec_cost (mode,
50401 mode == SFmode ? ix86_cost->fmass
50402 : ix86_cost->fmasd);
50408 /* If we do elementwise loads into a vector then we are bound by
50409 latency and execution resources for the many scalar loads
50410 (AGU and load ports). Try to account for this by scaling the
50411 construction cost by the number of elements involved. */
50412 if (kind == vec_construct
50414 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
50415 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
50416 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
50418 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50419 stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype);
50421 if (stmt_cost == -1)
50422 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50424 /* Penalize DFmode vector operations for Bonnell. */
50425 if (TARGET_BONNELL && kind == vector_stmt
50426 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
50427 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
50429 /* Statements in an inner loop relative to the loop being
50430 vectorized are weighted more heavily. The value here is
50431 arbitrary and could potentially be improved with analysis. */
50432 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
50433 count *= 50; /* FIXME. */
50435 retval = (unsigned) (count * stmt_cost);
50437 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
50438 for Silvermont as it has out of order integer pipeline and can execute
50439 2 scalar instruction per tick, but has in order SIMD pipeline. */
50440 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
50441 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
50443 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
50444 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
50445 retval = (retval * 17) / 10;
50448 cost[where] += retval;
50453 /* Implement targetm.vectorize.finish_cost. */
50456 ix86_finish_cost (void *data, unsigned *prologue_cost,
50457 unsigned *body_cost, unsigned *epilogue_cost)
50459 unsigned *cost = (unsigned *) data;
50460 *prologue_cost = cost[vect_prologue];
50461 *body_cost = cost[vect_body];
50462 *epilogue_cost = cost[vect_epilogue];
50465 /* Implement targetm.vectorize.destroy_cost_data. */
50468 ix86_destroy_cost_data (void *data)
50473 /* Validate target specific memory model bits in VAL. */
50475 static unsigned HOST_WIDE_INT
50476 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
50478 enum memmodel model = memmodel_from_int (val);
50481 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
50483 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
50485 warning (OPT_Winvalid_memory_model,
50486 "unknown architecture specific memory model");
50487 return MEMMODEL_SEQ_CST;
50489 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
50490 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
50492 warning (OPT_Winvalid_memory_model,
50493 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
50494 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
50496 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
50498 warning (OPT_Winvalid_memory_model,
50499 "HLE_RELEASE not used with RELEASE or stronger memory model");
50500 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
50505 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
50506 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
50507 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
50508 or number of vecsize_mangle variants that should be emitted. */
50511 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
50512 struct cgraph_simd_clone *clonei,
50513 tree base_type, int num)
50517 if (clonei->simdlen
50518 && (clonei->simdlen < 2
50519 || clonei->simdlen > 1024
50520 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
50522 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50523 "unsupported simdlen %d", clonei->simdlen);
50527 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
50528 if (TREE_CODE (ret_type) != VOID_TYPE)
50529 switch (TYPE_MODE (ret_type))
50537 /* case E_SCmode: */
50538 /* case E_DCmode: */
50541 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50542 "unsupported return type %qT for simd", ret_type);
50549 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
50550 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
50551 switch (TYPE_MODE (TREE_TYPE (t)))
50559 /* case E_SCmode: */
50560 /* case E_DCmode: */
50563 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50564 "unsupported argument type %qT for simd", TREE_TYPE (t));
50568 if (!TREE_PUBLIC (node->decl))
50570 /* If the function isn't exported, we can pick up just one ISA
50572 if (TARGET_AVX512F)
50573 clonei->vecsize_mangle = 'e';
50574 else if (TARGET_AVX2)
50575 clonei->vecsize_mangle = 'd';
50576 else if (TARGET_AVX)
50577 clonei->vecsize_mangle = 'c';
50579 clonei->vecsize_mangle = 'b';
50584 clonei->vecsize_mangle = "bcde"[num];
50587 clonei->mask_mode = VOIDmode;
50588 switch (clonei->vecsize_mangle)
50591 clonei->vecsize_int = 128;
50592 clonei->vecsize_float = 128;
50595 clonei->vecsize_int = 128;
50596 clonei->vecsize_float = 256;
50599 clonei->vecsize_int = 256;
50600 clonei->vecsize_float = 256;
50603 clonei->vecsize_int = 512;
50604 clonei->vecsize_float = 512;
50605 if (TYPE_MODE (base_type) == QImode)
50606 clonei->mask_mode = DImode;
50608 clonei->mask_mode = SImode;
50611 if (clonei->simdlen == 0)
50613 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
50614 clonei->simdlen = clonei->vecsize_int;
50616 clonei->simdlen = clonei->vecsize_float;
50617 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
50619 else if (clonei->simdlen > 16)
50621 /* For compatibility with ICC, use the same upper bounds
50622 for simdlen. In particular, for CTYPE below, use the return type,
50623 unless the function returns void, in that case use the characteristic
50624 type. If it is possible for given SIMDLEN to pass CTYPE value
50625 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
50626 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
50627 emit corresponding clone. */
50628 tree ctype = ret_type;
50629 if (TREE_CODE (ret_type) == VOID_TYPE)
50631 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
50632 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
50633 cnt /= clonei->vecsize_int;
50635 cnt /= clonei->vecsize_float;
50636 if (cnt > (TARGET_64BIT ? 16 : 8))
50638 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50639 "unsupported simdlen %d", clonei->simdlen);
50646 /* Add target attribute to SIMD clone NODE if needed. */
50649 ix86_simd_clone_adjust (struct cgraph_node *node)
50651 const char *str = NULL;
50652 gcc_assert (node->decl == cfun->decl);
50653 switch (node->simdclone->vecsize_mangle)
50668 if (!TARGET_AVX512F)
50672 gcc_unreachable ();
50677 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
50678 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
50681 ix86_reset_previous_fndecl ();
50682 ix86_set_current_function (node->decl);
50685 /* If SIMD clone NODE can't be used in a vectorized loop
50686 in current function, return -1, otherwise return a badness of using it
50687 (0 if it is most desirable from vecsize_mangle point of view, 1
50688 slightly less desirable, etc.). */
50691 ix86_simd_clone_usable (struct cgraph_node *node)
50693 switch (node->simdclone->vecsize_mangle)
50700 return TARGET_AVX2 ? 2 : 1;
50704 return TARGET_AVX2 ? 1 : 0;
50710 if (!TARGET_AVX512F)
50714 gcc_unreachable ();
50718 /* This function adjusts the unroll factor based on
50719 the hardware capabilities. For ex, bdver3 has
50720 a loop buffer which makes unrolling of smaller
50721 loops less important. This function decides the
50722 unroll factor using number of memory references
50723 (value 32 is used) as a heuristic. */
50726 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
50731 unsigned mem_count = 0;
50733 if (!TARGET_ADJUST_UNROLL)
50736 /* Count the number of memory references within the loop body.
50737 This value determines the unrolling factor for bdver3 and bdver4
50739 subrtx_iterator::array_type array;
50740 bbs = get_loop_body (loop);
50741 for (i = 0; i < loop->num_nodes; i++)
50742 FOR_BB_INSNS (bbs[i], insn)
50743 if (NONDEBUG_INSN_P (insn))
50744 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
50745 if (const_rtx x = *iter)
50748 machine_mode mode = GET_MODE (x);
50749 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
50757 if (mem_count && mem_count <=32)
50758 return MIN (nunroll, 32 / mem_count);
50764 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
50767 ix86_float_exceptions_rounding_supported_p (void)
50769 /* For x87 floating point with standard excess precision handling,
50770 there is no adddf3 pattern (since x87 floating point only has
50771 XFmode operations) so the default hook implementation gets this
50773 return TARGET_80387 || TARGET_SSE_MATH;
50776 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
50779 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
50781 if (!TARGET_80387 && !TARGET_SSE_MATH)
50783 tree exceptions_var = create_tmp_var_raw (integer_type_node);
50786 tree fenv_index_type = build_index_type (size_int (6));
50787 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
50788 tree fenv_var = create_tmp_var_raw (fenv_type);
50789 TREE_ADDRESSABLE (fenv_var) = 1;
50790 tree fenv_ptr = build_pointer_type (fenv_type);
50791 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
50792 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
50793 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
50794 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
50795 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
50796 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
50797 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
50798 tree hold_fnclex = build_call_expr (fnclex, 0);
50799 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
50800 NULL_TREE, NULL_TREE);
50801 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
50803 *clear = build_call_expr (fnclex, 0);
50804 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
50805 tree fnstsw_call = build_call_expr (fnstsw, 0);
50806 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
50807 sw_var, fnstsw_call);
50808 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
50809 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
50810 exceptions_var, exceptions_x87);
50811 *update = build2 (COMPOUND_EXPR, integer_type_node,
50812 sw_mod, update_mod);
50813 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
50814 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
50816 if (TARGET_SSE_MATH)
50818 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
50819 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
50820 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
50821 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
50822 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
50823 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
50824 mxcsr_orig_var, stmxcsr_hold_call);
50825 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
50827 build_int_cst (unsigned_type_node, 0x1f80));
50828 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
50829 build_int_cst (unsigned_type_node, 0xffffffc0));
50830 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
50831 mxcsr_mod_var, hold_mod_val);
50832 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50833 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
50834 hold_assign_orig, hold_assign_mod);
50835 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
50836 ldmxcsr_hold_call);
50838 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
50841 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50843 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
50844 ldmxcsr_clear_call);
50846 *clear = ldmxcsr_clear_call;
50847 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
50848 tree exceptions_sse = fold_convert (integer_type_node,
50849 stxmcsr_update_call);
50852 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
50853 exceptions_var, exceptions_sse);
50854 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
50855 exceptions_var, exceptions_mod);
50856 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
50857 exceptions_assign);
50860 *update = build2 (MODIFY_EXPR, integer_type_node,
50861 exceptions_var, exceptions_sse);
50862 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
50863 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50864 ldmxcsr_update_call);
50866 tree atomic_feraiseexcept
50867 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
50868 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
50869 1, exceptions_var);
50870 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50871 atomic_feraiseexcept_call);
50874 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
50875 /* For i386, common symbol is local only for non-PIE binaries. For
50876 x86-64, common symbol is local only for non-PIE binaries or linker
50877 supports copy reloc in PIE binaries. */
50880 ix86_binds_local_p (const_tree exp)
50882 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
50885 && HAVE_LD_PIE_COPYRELOC != 0)));
50889 /* If MEM is in the form of [base+offset], extract the two parts
50890 of address and set to BASE and OFFSET, otherwise return false. */
50893 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
50897 gcc_assert (MEM_P (mem));
50899 addr = XEXP (mem, 0);
50901 if (GET_CODE (addr) == CONST)
50902 addr = XEXP (addr, 0);
50904 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
50907 *offset = const0_rtx;
50911 if (GET_CODE (addr) == PLUS
50912 && (REG_P (XEXP (addr, 0))
50913 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
50914 && CONST_INT_P (XEXP (addr, 1)))
50916 *base = XEXP (addr, 0);
50917 *offset = XEXP (addr, 1);
50924 /* Given OPERANDS of consecutive load/store, check if we can merge
50925 them into move multiple. LOAD is true if they are load instructions.
50926 MODE is the mode of memory operands. */
50929 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
50932 HOST_WIDE_INT offval_1, offval_2, msize;
50933 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
50937 mem_1 = operands[1];
50938 mem_2 = operands[3];
50939 reg_1 = operands[0];
50940 reg_2 = operands[2];
50944 mem_1 = operands[0];
50945 mem_2 = operands[2];
50946 reg_1 = operands[1];
50947 reg_2 = operands[3];
50950 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
50952 if (REGNO (reg_1) != REGNO (reg_2))
50955 /* Check if the addresses are in the form of [base+offset]. */
50956 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
50958 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
50961 /* Check if the bases are the same. */
50962 if (!rtx_equal_p (base_1, base_2))
50965 offval_1 = INTVAL (offset_1);
50966 offval_2 = INTVAL (offset_2);
50967 msize = GET_MODE_SIZE (mode);
50968 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
50969 if (offval_1 + msize != offval_2)
50975 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
50978 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
50979 optimization_type opt_type)
50993 return opt_type == OPTIMIZE_FOR_SPEED;
50996 if (SSE_FLOAT_MODE_P (mode1)
50998 && !flag_trapping_math
51000 return opt_type == OPTIMIZE_FOR_SPEED;
51006 if (SSE_FLOAT_MODE_P (mode1)
51008 && !flag_trapping_math
51011 return opt_type == OPTIMIZE_FOR_SPEED;
51014 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
51021 /* Address space support.
51023 This is not "far pointers" in the 16-bit sense, but an easy way
51024 to use %fs and %gs segment prefixes. Therefore:
51026 (a) All address spaces have the same modes,
51027 (b) All address spaces have the same addresss forms,
51028 (c) While %fs and %gs are technically subsets of the generic
51029 address space, they are probably not subsets of each other.
51030 (d) Since we have no access to the segment base register values
51031 without resorting to a system call, we cannot convert a
51032 non-default address space to a default address space.
51033 Therefore we do not claim %fs or %gs are subsets of generic.
51035 Therefore we can (mostly) use the default hooks. */
51037 /* All use of segmentation is assumed to make address 0 valid. */
51040 ix86_addr_space_zero_address_valid (addr_space_t as)
51042 return as != ADDR_SPACE_GENERIC;
51046 ix86_init_libfuncs (void)
51050 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
51051 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
51055 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
51056 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
51060 darwin_rename_builtins ();
51064 /* Generate call to __divmoddi4. */
51067 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
51069 rtx *quot_p, rtx *rem_p)
51071 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
51073 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
51074 mode, op0, mode, op1, mode,
51075 XEXP (rem, 0), Pmode);
51080 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
51081 FPU, assume that the fpcw is set to extended precision; when using
51082 only SSE, rounding is correct; when using both SSE and the FPU,
51083 the rounding precision is indeterminate, since either may be chosen
51084 apparently at random. */
51086 static enum flt_eval_method
51087 ix86_excess_precision (enum excess_precision_type type)
51091 case EXCESS_PRECISION_TYPE_FAST:
51092 /* The fastest type to promote to will always be the native type,
51093 whether that occurs with implicit excess precision or
51095 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51096 case EXCESS_PRECISION_TYPE_STANDARD:
51097 case EXCESS_PRECISION_TYPE_IMPLICIT:
51098 /* Otherwise, the excess precision we want when we are
51099 in a standards compliant mode, and the implicit precision we
51100 provide would be identical were it not for the unpredictable
51103 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51104 else if (!TARGET_MIX_SSE_I387)
51106 if (!TARGET_SSE_MATH)
51107 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
51108 else if (TARGET_SSE2)
51109 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51112 /* If we are in standards compliant mode, but we know we will
51113 calculate in unpredictable precision, return
51114 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
51115 excess precision if the target can't guarantee it will honor
51117 return (type == EXCESS_PRECISION_TYPE_STANDARD
51118 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
51119 : FLT_EVAL_METHOD_UNPREDICTABLE);
51121 gcc_unreachable ();
51124 return FLT_EVAL_METHOD_UNPREDICTABLE;
51127 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
51128 decrements by exactly 2 no matter what the position was, there is no pushb.
51130 But as CIE data alignment factor on this arch is -4 for 32bit targets
51131 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
51132 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
51135 ix86_push_rounding (poly_int64 bytes)
51137 return ROUND_UP (bytes, UNITS_PER_WORD);
51140 /* Target-specific selftests. */
51144 namespace selftest {
51146 /* Verify that hard regs are dumped as expected (in compact mode). */
51149 ix86_test_dumping_hard_regs ()
51151 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
51152 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
51155 /* Test dumping an insn with repeated references to the same SCRATCH,
51156 to verify the rtx_reuse code. */
51159 ix86_test_dumping_memory_blockage ()
51161 set_new_first_and_last_insn (NULL, NULL);
51163 rtx pat = gen_memory_blockage ();
51164 rtx_reuse_manager r;
51165 r.preprocess (pat);
51167 /* Verify that the repeated references to the SCRATCH show use
51168 reuse IDS. The first should be prefixed with a reuse ID,
51169 and the second should be dumped as a "reuse_rtx" of that ID.
51170 The expected string assumes Pmode == DImode. */
51171 if (Pmode == DImode)
51172 ASSERT_RTL_DUMP_EQ_WITH_REUSE
51173 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
51175 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
51176 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
51179 /* Verify loading an RTL dump; specifically a dump of copying
51180 a param on x86_64 from a hard reg into the frame.
51181 This test is target-specific since the dump contains target-specific
51185 ix86_test_loading_dump_fragment_1 ()
51187 rtl_dump_test t (SELFTEST_LOCATION,
51188 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
51190 rtx_insn *insn = get_insn_by_uid (1);
51192 /* The block structure and indentation here is purely for
51193 readability; it mirrors the structure of the rtx. */
51196 rtx pat = PATTERN (insn);
51197 ASSERT_EQ (SET, GET_CODE (pat));
51199 rtx dest = SET_DEST (pat);
51200 ASSERT_EQ (MEM, GET_CODE (dest));
51201 /* Verify the "/c" was parsed. */
51202 ASSERT_TRUE (RTX_FLAG (dest, call));
51203 ASSERT_EQ (SImode, GET_MODE (dest));
51205 rtx addr = XEXP (dest, 0);
51206 ASSERT_EQ (PLUS, GET_CODE (addr));
51207 ASSERT_EQ (DImode, GET_MODE (addr));
51209 rtx lhs = XEXP (addr, 0);
51210 /* Verify that the "frame" REG was consolidated. */
51211 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
51214 rtx rhs = XEXP (addr, 1);
51215 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
51216 ASSERT_EQ (-4, INTVAL (rhs));
51219 /* Verify the "[1 i+0 S4 A32]" was parsed. */
51220 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
51221 /* "i" should have been handled by synthesizing a global int
51222 variable named "i". */
51223 mem_expr = MEM_EXPR (dest);
51224 ASSERT_NE (mem_expr, NULL);
51225 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
51226 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
51227 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
51228 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
51230 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
51231 ASSERT_EQ (0, MEM_OFFSET (dest));
51233 ASSERT_EQ (4, MEM_SIZE (dest));
51235 ASSERT_EQ (32, MEM_ALIGN (dest));
51238 rtx src = SET_SRC (pat);
51239 ASSERT_EQ (REG, GET_CODE (src));
51240 ASSERT_EQ (SImode, GET_MODE (src));
51241 ASSERT_EQ (5, REGNO (src));
51242 tree reg_expr = REG_EXPR (src);
51243 /* "i" here should point to the same var as for the MEM_EXPR. */
51244 ASSERT_EQ (reg_expr, mem_expr);
51249 /* Verify that the RTL loader copes with a call_insn dump.
51250 This test is target-specific since the dump contains a target-specific
51254 ix86_test_loading_call_insn ()
51256 /* The test dump includes register "xmm0", where requires TARGET_SSE
51261 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
51263 rtx_insn *insn = get_insns ();
51264 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
51267 ASSERT_TRUE (RTX_FLAG (insn, jump));
51269 rtx pat = PATTERN (insn);
51270 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
51272 /* Verify REG_NOTES. */
51274 /* "(expr_list:REG_CALL_DECL". */
51275 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
51276 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
51277 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
51279 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
51280 rtx_expr_list *note1 = note0->next ();
51281 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
51283 ASSERT_EQ (NULL, note1->next ());
51286 /* Verify CALL_INSN_FUNCTION_USAGE. */
51288 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
51289 rtx_expr_list *usage
51290 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
51291 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
51292 ASSERT_EQ (DFmode, GET_MODE (usage));
51293 ASSERT_EQ (USE, GET_CODE (usage->element ()));
51294 ASSERT_EQ (NULL, usage->next ());
51298 /* Verify that the RTL loader copes a dump from print_rtx_function.
51299 This test is target-specific since the dump contains target-specific
51303 ix86_test_loading_full_dump ()
51305 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
51307 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51309 rtx_insn *insn_1 = get_insn_by_uid (1);
51310 ASSERT_EQ (NOTE, GET_CODE (insn_1));
51312 rtx_insn *insn_7 = get_insn_by_uid (7);
51313 ASSERT_EQ (INSN, GET_CODE (insn_7));
51314 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
51316 rtx_insn *insn_15 = get_insn_by_uid (15);
51317 ASSERT_EQ (INSN, GET_CODE (insn_15));
51318 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
51320 /* Verify crtl->return_rtx. */
51321 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
51322 ASSERT_EQ (0, REGNO (crtl->return_rtx));
51323 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
51326 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
51327 In particular, verify that it correctly loads the 2nd operand.
51328 This test is target-specific since these are machine-specific
51329 operands (and enums). */
51332 ix86_test_loading_unspec ()
51334 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
51336 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51338 ASSERT_TRUE (cfun);
51340 /* Test of an UNSPEC. */
51341 rtx_insn *insn = get_insns ();
51342 ASSERT_EQ (INSN, GET_CODE (insn));
51343 rtx set = single_set (insn);
51344 ASSERT_NE (NULL, set);
51345 rtx dst = SET_DEST (set);
51346 ASSERT_EQ (MEM, GET_CODE (dst));
51347 rtx src = SET_SRC (set);
51348 ASSERT_EQ (UNSPEC, GET_CODE (src));
51349 ASSERT_EQ (BLKmode, GET_MODE (src));
51350 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
51352 rtx v0 = XVECEXP (src, 0, 0);
51354 /* Verify that the two uses of the first SCRATCH have pointer
51356 rtx scratch_a = XEXP (dst, 0);
51357 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
51359 rtx scratch_b = XEXP (v0, 0);
51360 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
51362 ASSERT_EQ (scratch_a, scratch_b);
51364 /* Verify that the two mems are thus treated as equal. */
51365 ASSERT_TRUE (rtx_equal_p (dst, v0));
51367 /* Verify the the insn is recognized. */
51368 ASSERT_NE(-1, recog_memoized (insn));
51370 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
51371 insn = NEXT_INSN (insn);
51372 ASSERT_EQ (INSN, GET_CODE (insn));
51374 set = single_set (insn);
51375 ASSERT_NE (NULL, set);
51377 src = SET_SRC (set);
51378 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
51379 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
51382 /* Run all target-specific selftests. */
51385 ix86_run_selftests (void)
51387 ix86_test_dumping_hard_regs ();
51388 ix86_test_dumping_memory_blockage ();
51390 /* Various tests of loading RTL dumps, here because they contain
51391 ix86-isms (e.g. names of hard regs). */
51392 ix86_test_loading_dump_fragment_1 ();
51393 ix86_test_loading_call_insn ();
51394 ix86_test_loading_full_dump ();
51395 ix86_test_loading_unspec ();
51398 } // namespace selftest
51400 #endif /* CHECKING_P */
51402 /* Initialize the GCC target structure. */
51403 #undef TARGET_RETURN_IN_MEMORY
51404 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51406 #undef TARGET_LEGITIMIZE_ADDRESS
51407 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51409 #undef TARGET_ATTRIBUTE_TABLE
51410 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51411 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51412 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51413 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51414 # undef TARGET_MERGE_DECL_ATTRIBUTES
51415 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51418 #undef TARGET_COMP_TYPE_ATTRIBUTES
51419 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51421 #undef TARGET_INIT_BUILTINS
51422 #define TARGET_INIT_BUILTINS ix86_init_builtins
51423 #undef TARGET_BUILTIN_DECL
51424 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51425 #undef TARGET_EXPAND_BUILTIN
51426 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51428 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51429 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51430 ix86_builtin_vectorized_function
51432 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51433 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51435 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
51436 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
51438 #undef TARGET_BUILTIN_RECIPROCAL
51439 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51441 #undef TARGET_ASM_FUNCTION_EPILOGUE
51442 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51444 #undef TARGET_ENCODE_SECTION_INFO
51445 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51446 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51448 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51451 #undef TARGET_ASM_OPEN_PAREN
51452 #define TARGET_ASM_OPEN_PAREN ""
51453 #undef TARGET_ASM_CLOSE_PAREN
51454 #define TARGET_ASM_CLOSE_PAREN ""
51456 #undef TARGET_ASM_BYTE_OP
51457 #define TARGET_ASM_BYTE_OP ASM_BYTE
51459 #undef TARGET_ASM_ALIGNED_HI_OP
51460 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51461 #undef TARGET_ASM_ALIGNED_SI_OP
51462 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51464 #undef TARGET_ASM_ALIGNED_DI_OP
51465 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51468 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51469 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51471 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51472 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51474 #undef TARGET_ASM_UNALIGNED_HI_OP
51475 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51476 #undef TARGET_ASM_UNALIGNED_SI_OP
51477 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51478 #undef TARGET_ASM_UNALIGNED_DI_OP
51479 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51481 #undef TARGET_PRINT_OPERAND
51482 #define TARGET_PRINT_OPERAND ix86_print_operand
51483 #undef TARGET_PRINT_OPERAND_ADDRESS
51484 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51485 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51486 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51487 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51488 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51490 #undef TARGET_SCHED_INIT_GLOBAL
51491 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51492 #undef TARGET_SCHED_ADJUST_COST
51493 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51494 #undef TARGET_SCHED_ISSUE_RATE
51495 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51496 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51497 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51498 ia32_multipass_dfa_lookahead
51499 #undef TARGET_SCHED_MACRO_FUSION_P
51500 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51501 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51502 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51504 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51505 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51507 #undef TARGET_MEMMODEL_CHECK
51508 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51510 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51511 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51514 #undef TARGET_HAVE_TLS
51515 #define TARGET_HAVE_TLS true
51517 #undef TARGET_CANNOT_FORCE_CONST_MEM
51518 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51519 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51520 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51522 #undef TARGET_DELEGITIMIZE_ADDRESS
51523 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51525 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
51526 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
51528 #undef TARGET_MS_BITFIELD_LAYOUT_P
51529 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51532 #undef TARGET_BINDS_LOCAL_P
51533 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51535 #undef TARGET_BINDS_LOCAL_P
51536 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51538 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51539 #undef TARGET_BINDS_LOCAL_P
51540 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51543 #undef TARGET_ASM_OUTPUT_MI_THUNK
51544 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51545 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51546 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51548 #undef TARGET_ASM_FILE_START
51549 #define TARGET_ASM_FILE_START x86_file_start
51551 #undef TARGET_OPTION_OVERRIDE
51552 #define TARGET_OPTION_OVERRIDE ix86_option_override
51554 #undef TARGET_REGISTER_MOVE_COST
51555 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51556 #undef TARGET_MEMORY_MOVE_COST
51557 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51558 #undef TARGET_RTX_COSTS
51559 #define TARGET_RTX_COSTS ix86_rtx_costs
51560 #undef TARGET_ADDRESS_COST
51561 #define TARGET_ADDRESS_COST ix86_address_cost
51563 #undef TARGET_FLAGS_REGNUM
51564 #define TARGET_FLAGS_REGNUM FLAGS_REG
51565 #undef TARGET_FIXED_CONDITION_CODE_REGS
51566 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51567 #undef TARGET_CC_MODES_COMPATIBLE
51568 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51570 #undef TARGET_MACHINE_DEPENDENT_REORG
51571 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51573 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51574 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51576 #undef TARGET_BUILD_BUILTIN_VA_LIST
51577 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51579 #undef TARGET_FOLD_BUILTIN
51580 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51582 #undef TARGET_GIMPLE_FOLD_BUILTIN
51583 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
51585 #undef TARGET_COMPARE_VERSION_PRIORITY
51586 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51588 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51589 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51590 ix86_generate_version_dispatcher_body
51592 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51593 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51594 ix86_get_function_versions_dispatcher
51596 #undef TARGET_ENUM_VA_LIST_P
51597 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51599 #undef TARGET_FN_ABI_VA_LIST
51600 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51602 #undef TARGET_CANONICAL_VA_LIST_TYPE
51603 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51605 #undef TARGET_EXPAND_BUILTIN_VA_START
51606 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51608 #undef TARGET_MD_ASM_ADJUST
51609 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
51611 #undef TARGET_C_EXCESS_PRECISION
51612 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
51613 #undef TARGET_PROMOTE_PROTOTYPES
51614 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51615 #undef TARGET_SETUP_INCOMING_VARARGS
51616 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51617 #undef TARGET_MUST_PASS_IN_STACK
51618 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51619 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
51620 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
51621 #undef TARGET_FUNCTION_ARG_ADVANCE
51622 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51623 #undef TARGET_FUNCTION_ARG
51624 #define TARGET_FUNCTION_ARG ix86_function_arg
51625 #undef TARGET_INIT_PIC_REG
51626 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51627 #undef TARGET_USE_PSEUDO_PIC_REG
51628 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51629 #undef TARGET_FUNCTION_ARG_BOUNDARY
51630 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51631 #undef TARGET_PASS_BY_REFERENCE
51632 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51633 #undef TARGET_INTERNAL_ARG_POINTER
51634 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51635 #undef TARGET_UPDATE_STACK_BOUNDARY
51636 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51637 #undef TARGET_GET_DRAP_RTX
51638 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51639 #undef TARGET_STRICT_ARGUMENT_NAMING
51640 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51641 #undef TARGET_STATIC_CHAIN
51642 #define TARGET_STATIC_CHAIN ix86_static_chain
51643 #undef TARGET_TRAMPOLINE_INIT
51644 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51645 #undef TARGET_RETURN_POPS_ARGS
51646 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51648 #undef TARGET_WARN_FUNC_RETURN
51649 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
51651 #undef TARGET_LEGITIMATE_COMBINED_INSN
51652 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51654 #undef TARGET_ASAN_SHADOW_OFFSET
51655 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51657 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51658 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51660 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51661 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51663 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51664 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51666 #undef TARGET_C_MODE_FOR_SUFFIX
51667 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51670 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51671 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51674 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51675 #undef TARGET_INSERT_ATTRIBUTES
51676 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51679 #undef TARGET_MANGLE_TYPE
51680 #define TARGET_MANGLE_TYPE ix86_mangle_type
51682 #undef TARGET_STACK_PROTECT_GUARD
51683 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
51686 #undef TARGET_STACK_PROTECT_FAIL
51687 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51690 #undef TARGET_FUNCTION_VALUE
51691 #define TARGET_FUNCTION_VALUE ix86_function_value
51693 #undef TARGET_FUNCTION_VALUE_REGNO_P
51694 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51696 #undef TARGET_PROMOTE_FUNCTION_MODE
51697 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51699 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
51700 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
51702 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51703 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51705 #undef TARGET_INSTANTIATE_DECLS
51706 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51708 #undef TARGET_SECONDARY_RELOAD
51709 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51710 #undef TARGET_SECONDARY_MEMORY_NEEDED
51711 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
51712 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
51713 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
51715 #undef TARGET_CLASS_MAX_NREGS
51716 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51718 #undef TARGET_PREFERRED_RELOAD_CLASS
51719 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51720 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51721 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51722 #undef TARGET_CLASS_LIKELY_SPILLED_P
51723 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51727 ix86_builtin_vectorization_cost
51728 #undef TARGET_VECTORIZE_VEC_PERM_CONST
51729 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
51730 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51731 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51732 ix86_preferred_simd_mode
51733 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
51734 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
51735 ix86_split_reduction
51736 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51737 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51738 ix86_autovectorize_vector_sizes
51739 #undef TARGET_VECTORIZE_GET_MASK_MODE
51740 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
51741 #undef TARGET_VECTORIZE_INIT_COST
51742 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51743 #undef TARGET_VECTORIZE_ADD_STMT_COST
51744 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51745 #undef TARGET_VECTORIZE_FINISH_COST
51746 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51747 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51748 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51750 #undef TARGET_SET_CURRENT_FUNCTION
51751 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51753 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51754 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51756 #undef TARGET_OPTION_SAVE
51757 #define TARGET_OPTION_SAVE ix86_function_specific_save
51759 #undef TARGET_OPTION_RESTORE
51760 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51762 #undef TARGET_OPTION_POST_STREAM_IN
51763 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
51765 #undef TARGET_OPTION_PRINT
51766 #define TARGET_OPTION_PRINT ix86_function_specific_print
51768 #undef TARGET_OPTION_FUNCTION_VERSIONS
51769 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
51771 #undef TARGET_CAN_INLINE_P
51772 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51774 #undef TARGET_LEGITIMATE_ADDRESS_P
51775 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51777 #undef TARGET_REGISTER_PRIORITY
51778 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51780 #undef TARGET_REGISTER_USAGE_LEVELING_P
51781 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51783 #undef TARGET_LEGITIMATE_CONSTANT_P
51784 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51786 #undef TARGET_COMPUTE_FRAME_LAYOUT
51787 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
51789 #undef TARGET_FRAME_POINTER_REQUIRED
51790 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51792 #undef TARGET_CAN_ELIMINATE
51793 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51795 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51796 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51798 #undef TARGET_ASM_CODE_END
51799 #define TARGET_ASM_CODE_END ix86_code_end
51801 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51802 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51804 #undef TARGET_CANONICALIZE_COMPARISON
51805 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
51807 #undef TARGET_LOOP_UNROLL_ADJUST
51808 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51810 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
51811 #undef TARGET_SPILL_CLASS
51812 #define TARGET_SPILL_CLASS ix86_spill_class
51814 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51815 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51816 ix86_simd_clone_compute_vecsize_and_simdlen
51818 #undef TARGET_SIMD_CLONE_ADJUST
51819 #define TARGET_SIMD_CLONE_ADJUST \
51820 ix86_simd_clone_adjust
51822 #undef TARGET_SIMD_CLONE_USABLE
51823 #define TARGET_SIMD_CLONE_USABLE \
51824 ix86_simd_clone_usable
51826 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51827 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51828 ix86_float_exceptions_rounding_supported_p
51830 #undef TARGET_MODE_EMIT
51831 #define TARGET_MODE_EMIT ix86_emit_mode_set
51833 #undef TARGET_MODE_NEEDED
51834 #define TARGET_MODE_NEEDED ix86_mode_needed
51836 #undef TARGET_MODE_AFTER
51837 #define TARGET_MODE_AFTER ix86_mode_after
51839 #undef TARGET_MODE_ENTRY
51840 #define TARGET_MODE_ENTRY ix86_mode_entry
51842 #undef TARGET_MODE_EXIT
51843 #define TARGET_MODE_EXIT ix86_mode_exit
51845 #undef TARGET_MODE_PRIORITY
51846 #define TARGET_MODE_PRIORITY ix86_mode_priority
51848 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51849 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51851 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51852 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51854 #undef TARGET_OFFLOAD_OPTIONS
51855 #define TARGET_OFFLOAD_OPTIONS \
51856 ix86_offload_options
51858 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
51859 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
51861 #undef TARGET_OPTAB_SUPPORTED_P
51862 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
51864 #undef TARGET_HARD_REGNO_SCRATCH_OK
51865 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
51867 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
51868 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
51870 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
51871 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
51873 #undef TARGET_INIT_LIBFUNCS
51874 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
51876 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
51877 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
51879 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
51880 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
51882 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
51883 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
51885 #undef TARGET_HARD_REGNO_NREGS
51886 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
51887 #undef TARGET_HARD_REGNO_MODE_OK
51888 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
51890 #undef TARGET_MODES_TIEABLE_P
51891 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
51893 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
51894 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
51895 ix86_hard_regno_call_part_clobbered
51897 #undef TARGET_CAN_CHANGE_MODE_CLASS
51898 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
51900 #undef TARGET_STATIC_RTX_ALIGNMENT
51901 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
51902 #undef TARGET_CONSTANT_ALIGNMENT
51903 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
51905 #undef TARGET_EMPTY_RECORD_P
51906 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
51908 #undef TARGET_WARN_PARAMETER_PASSING_ABI
51909 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
51912 #undef TARGET_RUN_TARGET_SELFTESTS
51913 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
51914 #endif /* #if CHECKING_P */
51916 struct gcc_target targetm = TARGET_INITIALIZER;
51918 #include "gt-i386.h"