1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
834 static tree ix86_build_builtin_va_list (void);
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address (rtx, struct ix86_address *);
844 static int ix86_address_cost (rtx);
845 static bool ix86_cannot_force_const_mem (rtx);
846 static rtx ix86_delegitimize_address (rtx);
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi (const struct builtin_description *,
851 static rtx ix86_expand_sse_compare (const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
854 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
855 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_store_builtin (enum insn_code, tree);
857 static rtx safe_vector_operand (rtx, enum machine_mode);
858 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
859 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
860 enum rtx_code *, enum rtx_code *);
861 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
862 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
863 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
864 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
865 static int ix86_fp_comparison_cost (enum rtx_code code);
866 static unsigned int ix86_select_alt_pic_regnum (void);
867 static int ix86_save_reg (unsigned int, int);
868 static void ix86_compute_frame_layout (struct ix86_frame *);
869 static int ix86_comp_type_attributes (tree, tree);
870 static int ix86_function_regparm (tree, tree);
871 const struct attribute_spec ix86_attribute_table[];
872 static bool ix86_function_ok_for_sibcall (tree, tree);
873 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
874 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
875 static int ix86_value_regno (enum machine_mode);
876 static bool contains_128bit_aligned_vector_p (tree);
877 static bool ix86_ms_bitfield_layout_p (tree);
878 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
879 static int extended_reg_mentioned_1 (rtx *, void *);
880 static bool ix86_rtx_costs (rtx, int, int, int *);
881 static int min_insn_size (rtx);
882 static void k8_avoid_jump_misspredicts (void);
884 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
885 static void ix86_svr3_asm_out_constructor (rtx, int);
888 /* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
894 whenever possible (upper half does contain padding).
896 enum x86_64_reg_class
899 X86_64_INTEGER_CLASS,
900 X86_64_INTEGERSI_CLASS,
909 static const char * const x86_64_reg_class_name[] =
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912 #define MAX_CLASSES 4
913 static int classify_argument (enum machine_mode, tree,
914 enum x86_64_reg_class [MAX_CLASSES], int);
915 static int examine_argument (enum machine_mode, tree, int, int *, int *);
916 static rtx construct_container (enum machine_mode, tree, int, int, int,
918 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
919 enum x86_64_reg_class);
921 /* Table of constants used by fldpi, fldln2, etc... */
922 static REAL_VALUE_TYPE ext_80387_constants_table [5];
923 static bool ext_80387_constants_init = 0;
924 static void init_ext_80387_constants (void);
926 /* Initialize the GCC target structure. */
927 #undef TARGET_ATTRIBUTE_TABLE
928 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
929 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
930 # undef TARGET_MERGE_DECL_ATTRIBUTES
931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #undef TARGET_COMP_TYPE_ATTRIBUTES
935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
937 #undef TARGET_INIT_BUILTINS
938 #define TARGET_INIT_BUILTINS ix86_init_builtins
940 #undef TARGET_EXPAND_BUILTIN
941 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
943 #undef TARGET_ASM_FUNCTION_EPILOGUE
944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
946 #undef TARGET_ASM_OPEN_PAREN
947 #define TARGET_ASM_OPEN_PAREN ""
948 #undef TARGET_ASM_CLOSE_PAREN
949 #define TARGET_ASM_CLOSE_PAREN ""
951 #undef TARGET_ASM_ALIGNED_HI_OP
952 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953 #undef TARGET_ASM_ALIGNED_SI_OP
954 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
956 #undef TARGET_ASM_ALIGNED_DI_OP
957 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #undef TARGET_ASM_UNALIGNED_HI_OP
961 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962 #undef TARGET_ASM_UNALIGNED_SI_OP
963 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964 #undef TARGET_ASM_UNALIGNED_DI_OP
965 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
967 #undef TARGET_SCHED_ADJUST_COST
968 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969 #undef TARGET_SCHED_ISSUE_RATE
970 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971 #undef TARGET_SCHED_VARIABLE_ISSUE
972 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
973 #undef TARGET_SCHED_INIT
974 #define TARGET_SCHED_INIT ix86_sched_init
975 #undef TARGET_SCHED_REORDER
976 #define TARGET_SCHED_REORDER ix86_sched_reorder
977 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
978 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
979 ia32_use_dfa_pipeline_interface
980 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
981 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
982 ia32_multipass_dfa_lookahead
984 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
985 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
988 #undef TARGET_HAVE_TLS
989 #define TARGET_HAVE_TLS true
991 #undef TARGET_CANNOT_FORCE_CONST_MEM
992 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
994 #undef TARGET_DELEGITIMIZE_ADDRESS
995 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
997 #undef TARGET_MS_BITFIELD_LAYOUT_P
998 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1000 #undef TARGET_ASM_OUTPUT_MI_THUNK
1001 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1002 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1005 #undef TARGET_ASM_FILE_START
1006 #define TARGET_ASM_FILE_START x86_file_start
1008 #undef TARGET_RTX_COSTS
1009 #define TARGET_RTX_COSTS ix86_rtx_costs
1010 #undef TARGET_ADDRESS_COST
1011 #define TARGET_ADDRESS_COST ix86_address_cost
1013 #undef TARGET_MACHINE_DEPENDENT_REORG
1014 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1016 #undef TARGET_BUILD_BUILTIN_VA_LIST
1017 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1019 struct gcc_target targetm = TARGET_INITIALIZER;
1021 /* The svr4 ABI for the i386 says that records and unions are returned
1023 #ifndef DEFAULT_PCC_STRUCT_RETURN
1024 #define DEFAULT_PCC_STRUCT_RETURN 1
1027 /* Sometimes certain combinations of command options do not make
1028 sense on a particular target machine. You can define a macro
1029 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1030 defined, is executed once just after all the command options have
1033 Don't use this macro to turn on various extra optimizations for
1034 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1037 override_options (void)
1040 /* Comes from final.c -- no real reason to change it. */
1041 #define MAX_CODE_ALIGN 16
1045 const struct processor_costs *cost; /* Processor costs */
1046 const int target_enable; /* Target flags to enable. */
1047 const int target_disable; /* Target flags to disable. */
1048 const int align_loop; /* Default alignments. */
1049 const int align_loop_max_skip;
1050 const int align_jump;
1051 const int align_jump_max_skip;
1052 const int align_func;
1054 const processor_target_table[PROCESSOR_max] =
1056 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1057 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1058 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1059 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1060 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1061 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1063 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1066 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1069 const char *const name; /* processor name or nickname. */
1070 const enum processor_type processor;
1071 const enum pta_flags
1076 PTA_PREFETCH_SSE = 8,
1082 const processor_alias_table[] =
1084 {"i386", PROCESSOR_I386, 0},
1085 {"i486", PROCESSOR_I486, 0},
1086 {"i586", PROCESSOR_PENTIUM, 0},
1087 {"pentium", PROCESSOR_PENTIUM, 0},
1088 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1089 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1090 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1091 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1092 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1093 {"i686", PROCESSOR_PENTIUMPRO, 0},
1094 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1095 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1096 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1097 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1098 PTA_MMX | PTA_PREFETCH_SSE},
1099 {"k6", PROCESSOR_K6, PTA_MMX},
1100 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1101 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1102 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1104 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1105 | PTA_3DNOW | PTA_3DNOW_A},
1106 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1113 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1116 int const pta_size = ARRAY_SIZE (processor_alias_table);
1118 /* Set the default values for switches whose default depends on TARGET_64BIT
1119 in case they weren't overwritten by command line options. */
1122 if (flag_omit_frame_pointer == 2)
1123 flag_omit_frame_pointer = 1;
1124 if (flag_asynchronous_unwind_tables == 2)
1125 flag_asynchronous_unwind_tables = 1;
1126 if (flag_pcc_struct_return == 2)
1127 flag_pcc_struct_return = 0;
1131 if (flag_omit_frame_pointer == 2)
1132 flag_omit_frame_pointer = 0;
1133 if (flag_asynchronous_unwind_tables == 2)
1134 flag_asynchronous_unwind_tables = 0;
1135 if (flag_pcc_struct_return == 2)
1136 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1139 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1140 SUBTARGET_OVERRIDE_OPTIONS;
1143 if (!ix86_tune_string && ix86_arch_string)
1144 ix86_tune_string = ix86_arch_string;
1145 if (!ix86_tune_string)
1146 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1147 if (!ix86_arch_string)
1148 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1150 if (ix86_cmodel_string != 0)
1152 if (!strcmp (ix86_cmodel_string, "small"))
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1156 else if (!strcmp (ix86_cmodel_string, "32"))
1157 ix86_cmodel = CM_32;
1158 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1159 ix86_cmodel = CM_KERNEL;
1160 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1161 ix86_cmodel = CM_MEDIUM;
1162 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1163 ix86_cmodel = CM_LARGE;
1165 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1169 ix86_cmodel = CM_32;
1171 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1173 if (ix86_asm_string != 0)
1175 if (!strcmp (ix86_asm_string, "intel"))
1176 ix86_asm_dialect = ASM_INTEL;
1177 else if (!strcmp (ix86_asm_string, "att"))
1178 ix86_asm_dialect = ASM_ATT;
1180 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1182 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1183 error ("code model `%s' not supported in the %s bit mode",
1184 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1185 if (ix86_cmodel == CM_LARGE)
1186 sorry ("code model `large' not supported yet");
1187 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1188 sorry ("%i-bit mode not compiled in",
1189 (target_flags & MASK_64BIT) ? 64 : 32);
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1194 ix86_arch = processor_alias_table[i].processor;
1195 /* Default cpu tuning to the architecture. */
1196 ix86_tune = ix86_arch;
1197 if (processor_alias_table[i].flags & PTA_MMX
1198 && !(target_flags_explicit & MASK_MMX))
1199 target_flags |= MASK_MMX;
1200 if (processor_alias_table[i].flags & PTA_3DNOW
1201 && !(target_flags_explicit & MASK_3DNOW))
1202 target_flags |= MASK_3DNOW;
1203 if (processor_alias_table[i].flags & PTA_3DNOW_A
1204 && !(target_flags_explicit & MASK_3DNOW_A))
1205 target_flags |= MASK_3DNOW_A;
1206 if (processor_alias_table[i].flags & PTA_SSE
1207 && !(target_flags_explicit & MASK_SSE))
1208 target_flags |= MASK_SSE;
1209 if (processor_alias_table[i].flags & PTA_SSE2
1210 && !(target_flags_explicit & MASK_SSE2))
1211 target_flags |= MASK_SSE2;
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
1214 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1215 error ("CPU you selected does not support x86-64 instruction set");
1220 error ("bad value (%s) for -march= switch", ix86_arch_string);
1222 for (i = 0; i < pta_size; i++)
1223 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1225 ix86_tune = processor_alias_table[i].processor;
1226 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1227 error ("CPU you selected does not support x86-64 instruction set");
1230 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1231 x86_prefetch_sse = true;
1233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1236 ix86_cost = &size_cost;
1238 ix86_cost = processor_target_table[ix86_tune].cost;
1239 target_flags |= processor_target_table[ix86_tune].target_enable;
1240 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1242 /* Arrange to set up i386_stack_locals for all functions. */
1243 init_machine_status = ix86_init_machine_status;
1245 /* Validate -mregparm= value. */
1246 if (ix86_regparm_string)
1248 i = atoi (ix86_regparm_string);
1249 if (i < 0 || i > REGPARM_MAX)
1250 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1256 ix86_regparm = REGPARM_MAX;
1258 /* If the user has provided any of the -malign-* options,
1259 warn and use that value only if -falign-* is not set.
1260 Remove this code in GCC 3.2 or later. */
1261 if (ix86_align_loops_string)
1263 warning ("-malign-loops is obsolete, use -falign-loops");
1264 if (align_loops == 0)
1266 i = atoi (ix86_align_loops_string);
1267 if (i < 0 || i > MAX_CODE_ALIGN)
1268 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1270 align_loops = 1 << i;
1274 if (ix86_align_jumps_string)
1276 warning ("-malign-jumps is obsolete, use -falign-jumps");
1277 if (align_jumps == 0)
1279 i = atoi (ix86_align_jumps_string);
1280 if (i < 0 || i > MAX_CODE_ALIGN)
1281 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1283 align_jumps = 1 << i;
1287 if (ix86_align_funcs_string)
1289 warning ("-malign-functions is obsolete, use -falign-functions");
1290 if (align_functions == 0)
1292 i = atoi (ix86_align_funcs_string);
1293 if (i < 0 || i > MAX_CODE_ALIGN)
1294 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1296 align_functions = 1 << i;
1300 /* Default align_* from the processor table. */
1301 if (align_loops == 0)
1303 align_loops = processor_target_table[ix86_tune].align_loop;
1304 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1306 if (align_jumps == 0)
1308 align_jumps = processor_target_table[ix86_tune].align_jump;
1309 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1311 if (align_functions == 0)
1313 align_functions = processor_target_table[ix86_tune].align_func;
1316 /* Validate -mpreferred-stack-boundary= value, or provide default.
1317 The default of 128 bits is for Pentium III's SSE __m128, but we
1318 don't want additional code to keep the stack aligned when
1319 optimizing for code size. */
1320 ix86_preferred_stack_boundary = (optimize_size
1321 ? TARGET_64BIT ? 128 : 32
1323 if (ix86_preferred_stack_boundary_string)
1325 i = atoi (ix86_preferred_stack_boundary_string);
1326 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1327 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1328 TARGET_64BIT ? 4 : 2);
1330 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1333 /* Validate -mbranch-cost= value, or provide default. */
1334 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1335 if (ix86_branch_cost_string)
1337 i = atoi (ix86_branch_cost_string);
1339 error ("-mbranch-cost=%d is not between 0 and 5", i);
1341 ix86_branch_cost = i;
1344 if (ix86_tls_dialect_string)
1346 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1347 ix86_tls_dialect = TLS_DIALECT_GNU;
1348 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1349 ix86_tls_dialect = TLS_DIALECT_SUN;
1351 error ("bad value (%s) for -mtls-dialect= switch",
1352 ix86_tls_dialect_string);
1355 /* Keep nonleaf frame pointers. */
1356 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1357 flag_omit_frame_pointer = 1;
1359 /* If we're doing fast math, we don't care about comparison order
1360 wrt NaNs. This lets us use a shorter comparison sequence. */
1361 if (flag_unsafe_math_optimizations)
1362 target_flags &= ~MASK_IEEE_FP;
1364 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1365 since the insns won't need emulation. */
1366 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1367 target_flags &= ~MASK_NO_FANCY_MATH_387;
1369 /* Turn on SSE2 builtins for -mpni. */
1371 target_flags |= MASK_SSE2;
1373 /* Turn on SSE builtins for -msse2. */
1375 target_flags |= MASK_SSE;
1377 target_flags |= (MASK_128BIT_LONG_DOUBLE);
1380 if (TARGET_ALIGN_DOUBLE)
1381 error ("-malign-double makes no sense in the 64bit mode");
1383 error ("-mrtd calling convention not supported in the 64bit mode");
1384 /* Enable by default the SSE and MMX builtins. */
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1397 if (ix86_fpmath_string != 0)
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1409 ix86_fpmath = FPMATH_SSE;
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1419 else if (!TARGET_80387)
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1442 target_flags |= MASK_MMX;
1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1464 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1470 flag_schedule_insns = 0;
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1513 ix86_function_ok_for_sibcall (tree decl, tree exp)
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
1531 register for the address of the target function. Make sure that all
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1537 /* We're looking at the CALL_EXPR, we need the type of the function. */
1538 type = TREE_OPERAND (exp, 0); /* pointer expression */
1539 type = TREE_TYPE (type); /* pointer type */
1540 type = TREE_TYPE (type); /* function type */
1542 if (ix86_function_regparm (type, NULL) >= 3)
1544 /* ??? Need to count the actual number of registers to be used,
1545 not the possible number of registers. Fix later. */
1550 /* Otherwise okay. That also includes certain types of indirect calls. */
1554 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1555 arguments as in struct attribute_spec.handler. */
1557 ix86_handle_cdecl_attribute (tree *node, tree name,
1558 tree args ATTRIBUTE_UNUSED,
1559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1561 if (TREE_CODE (*node) != FUNCTION_TYPE
1562 && TREE_CODE (*node) != METHOD_TYPE
1563 && TREE_CODE (*node) != FIELD_DECL
1564 && TREE_CODE (*node) != TYPE_DECL)
1566 warning ("`%s' attribute only applies to functions",
1567 IDENTIFIER_POINTER (name));
1568 *no_add_attrs = true;
1572 if (is_attribute_p ("fastcall", name))
1574 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1576 error ("fastcall and stdcall attributes are not compatible");
1578 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1580 error ("fastcall and regparm attributes are not compatible");
1583 else if (is_attribute_p ("stdcall", name))
1585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1587 error ("fastcall and stdcall attributes are not compatible");
1594 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1601 /* Handle a "regparm" attribute;
1602 arguments as in struct attribute_spec.handler. */
1604 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1605 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1607 if (TREE_CODE (*node) != FUNCTION_TYPE
1608 && TREE_CODE (*node) != METHOD_TYPE
1609 && TREE_CODE (*node) != FIELD_DECL
1610 && TREE_CODE (*node) != TYPE_DECL)
1612 warning ("`%s' attribute only applies to functions",
1613 IDENTIFIER_POINTER (name));
1614 *no_add_attrs = true;
1620 cst = TREE_VALUE (args);
1621 if (TREE_CODE (cst) != INTEGER_CST)
1623 warning ("`%s' attribute requires an integer constant argument",
1624 IDENTIFIER_POINTER (name));
1625 *no_add_attrs = true;
1627 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1629 warning ("argument to `%s' attribute larger than %d",
1630 IDENTIFIER_POINTER (name), REGPARM_MAX);
1631 *no_add_attrs = true;
1634 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1636 error ("fastcall and regparm attributes are not compatible");
1643 /* Return 0 if the attributes for two types are incompatible, 1 if they
1644 are compatible, and 2 if they are nearly compatible (which causes a
1645 warning to be generated). */
1648 ix86_comp_type_attributes (tree type1, tree type2)
1650 /* Check for mismatch of non-default calling convention. */
1651 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1653 if (TREE_CODE (type1) != FUNCTION_TYPE)
1656 /* Check for mismatched fastcall types */
1657 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1658 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1661 /* Check for mismatched return types (cdecl vs stdcall). */
1662 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1668 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1669 DECL may be NULL when calling function indirectly
1670 or considerling a libcall. */
1673 ix86_function_regparm (tree type, tree decl)
1676 int regparm = ix86_regparm;
1677 bool user_convention = false;
1681 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1684 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1685 user_convention = true;
1688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1691 user_convention = true;
1694 /* Use register calling convention for local functions when possible. */
1695 if (!TARGET_64BIT && !user_convention && decl
1696 && flag_unit_at_a_time && !profile_flag)
1698 struct cgraph_local_info *i = cgraph_local_info (decl);
1701 /* We can't use regparm(3) for nested functions as these use
1702 static chain pointer in third argument. */
1703 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1713 /* Return true if EAX is live at the start of the function. Used by
1714 ix86_expand_prologue to determine if we need special help before
1715 calling allocate_stack_worker. */
1718 ix86_eax_live_at_start_p (void)
1720 /* Cheat. Don't bother working forward from ix86_function_regparm
1721 to the function type to whether an actual argument is located in
1722 eax. Instead just look at cfg info, which is still close enough
1723 to correct at this point. This gives false positives for broken
1724 functions that might use uninitialized data that happens to be
1725 allocated in eax, but who cares? */
1726 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1729 /* Value is the number of bytes of arguments automatically
1730 popped when returning from a subroutine call.
1731 FUNDECL is the declaration node of the function (as a tree),
1732 FUNTYPE is the data type of the function (as a tree),
1733 or for a library call it is an identifier node for the subroutine name.
1734 SIZE is the number of bytes of arguments passed on the stack.
1736 On the 80386, the RTD insn may be used to pop them if the number
1737 of args is fixed, but if the number is variable then the caller
1738 must pop them all. RTD can't be used for library calls now
1739 because the library is compiled with the Unix compiler.
1740 Use of RTD is a selectable option, since it is incompatible with
1741 standard Unix calling sequences. If the option is not selected,
1742 the caller must always pop the args.
1744 The attribute stdcall is equivalent to RTD on a per module basis. */
1747 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1749 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1751 /* Cdecl functions override -mrtd, and never pop the stack. */
1752 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1754 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1755 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1756 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1760 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1761 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1762 == void_type_node)))
1766 /* Lose any fake structure return argument if it is passed on the stack. */
1767 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1770 int nregs = ix86_function_regparm (funtype, fundecl);
1773 return GET_MODE_SIZE (Pmode);
1779 /* Argument support functions. */
1781 /* Return true when register may be used to pass function parameters. */
1783 ix86_function_arg_regno_p (int regno)
1787 return (regno < REGPARM_MAX
1788 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1789 if (SSE_REGNO_P (regno) && TARGET_SSE)
1791 /* RAX is used as hidden argument to va_arg functions. */
1794 for (i = 0; i < REGPARM_MAX; i++)
1795 if (regno == x86_64_int_parameter_registers[i])
1800 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1801 for a call to a function whose data type is FNTYPE.
1802 For a library call, FNTYPE is 0. */
1805 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1806 tree fntype, /* tree ptr for function decl */
1807 rtx libname, /* SYMBOL_REF of library name or 0 */
1810 static CUMULATIVE_ARGS zero_cum;
1811 tree param, next_param;
1813 if (TARGET_DEBUG_ARG)
1815 fprintf (stderr, "\ninit_cumulative_args (");
1817 fprintf (stderr, "fntype code = %s, ret code = %s",
1818 tree_code_name[(int) TREE_CODE (fntype)],
1819 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1821 fprintf (stderr, "no fntype");
1824 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1829 /* Set up the number of registers to use for passing arguments. */
1831 cum->nregs = ix86_function_regparm (fntype, fndecl);
1833 cum->nregs = ix86_regparm;
1834 cum->sse_nregs = SSE_REGPARM_MAX;
1835 cum->maybe_vaarg = false;
1837 /* Use ecx and edx registers if function has fastcall attribute */
1838 if (fntype && !TARGET_64BIT)
1840 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1848 /* Determine if this function has variable arguments. This is
1849 indicated by the last argument being 'void_type_mode' if there
1850 are no variable arguments. If there are variable arguments, then
1851 we won't pass anything in registers */
1855 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1856 param != 0; param = next_param)
1858 next_param = TREE_CHAIN (param);
1859 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1866 cum->maybe_vaarg = true;
1870 if ((!fntype && !libname)
1871 || (fntype && !TYPE_ARG_TYPES (fntype)))
1872 cum->maybe_vaarg = 1;
1874 if (TARGET_DEBUG_ARG)
1875 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1880 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1881 of this code is to classify each 8bytes of incoming argument by the register
1882 class and assign registers accordingly. */
1884 /* Return the union class of CLASS1 and CLASS2.
1885 See the x86-64 PS ABI for details. */
1887 static enum x86_64_reg_class
1888 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1890 /* Rule #1: If both classes are equal, this is the resulting class. */
1891 if (class1 == class2)
1894 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1896 if (class1 == X86_64_NO_CLASS)
1898 if (class2 == X86_64_NO_CLASS)
1901 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1902 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1903 return X86_64_MEMORY_CLASS;
1905 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1906 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1907 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1908 return X86_64_INTEGERSI_CLASS;
1909 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1910 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1911 return X86_64_INTEGER_CLASS;
1913 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1914 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1915 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1916 return X86_64_MEMORY_CLASS;
1918 /* Rule #6: Otherwise class SSE is used. */
1919 return X86_64_SSE_CLASS;
1922 /* Classify the argument of type TYPE and mode MODE.
1923 CLASSES will be filled by the register class used to pass each word
1924 of the operand. The number of words is returned. In case the parameter
1925 should be passed in memory, 0 is returned. As a special case for zero
1926 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1928 BIT_OFFSET is used internally for handling records and specifies offset
1929 of the offset in bits modulo 256 to avoid overflow cases.
1931 See the x86-64 PS ABI for details.
1935 classify_argument (enum machine_mode mode, tree type,
1936 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1939 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1940 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1942 /* Variable sized entities are always passed/returned in memory. */
1946 if (mode != VOIDmode
1947 && MUST_PASS_IN_STACK (mode, type))
1950 if (type && AGGREGATE_TYPE_P (type))
1954 enum x86_64_reg_class subclasses[MAX_CLASSES];
1956 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1960 for (i = 0; i < words; i++)
1961 classes[i] = X86_64_NO_CLASS;
1963 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1964 signalize memory class, so handle it as special case. */
1967 classes[0] = X86_64_NO_CLASS;
1971 /* Classify each field of record and merge classes. */
1972 if (TREE_CODE (type) == RECORD_TYPE)
1974 /* For classes first merge in the field of the subclasses. */
1975 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1977 tree bases = TYPE_BINFO_BASETYPES (type);
1978 int n_bases = TREE_VEC_LENGTH (bases);
1981 for (i = 0; i < n_bases; ++i)
1983 tree binfo = TREE_VEC_ELT (bases, i);
1985 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1986 tree type = BINFO_TYPE (binfo);
1988 num = classify_argument (TYPE_MODE (type),
1990 (offset + bit_offset) % 256);
1993 for (i = 0; i < num; i++)
1995 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1997 merge_classes (subclasses[i], classes[i + pos]);
2001 /* And now merge the fields of structure. */
2002 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2004 if (TREE_CODE (field) == FIELD_DECL)
2008 /* Bitfields are always classified as integer. Handle them
2009 early, since later code would consider them to be
2010 misaligned integers. */
2011 if (DECL_BIT_FIELD (field))
2013 for (i = int_bit_position (field) / 8 / 8;
2014 i < (int_bit_position (field)
2015 + tree_low_cst (DECL_SIZE (field), 0)
2018 merge_classes (X86_64_INTEGER_CLASS,
2023 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2024 TREE_TYPE (field), subclasses,
2025 (int_bit_position (field)
2026 + bit_offset) % 256);
2029 for (i = 0; i < num; i++)
2032 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2034 merge_classes (subclasses[i], classes[i + pos]);
2040 /* Arrays are handled as small records. */
2041 else if (TREE_CODE (type) == ARRAY_TYPE)
2044 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2045 TREE_TYPE (type), subclasses, bit_offset);
2049 /* The partial classes are now full classes. */
2050 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2051 subclasses[0] = X86_64_SSE_CLASS;
2052 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2053 subclasses[0] = X86_64_INTEGER_CLASS;
2055 for (i = 0; i < words; i++)
2056 classes[i] = subclasses[i % num];
2058 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2059 else if (TREE_CODE (type) == UNION_TYPE
2060 || TREE_CODE (type) == QUAL_UNION_TYPE)
2062 /* For classes first merge in the field of the subclasses. */
2063 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2065 tree bases = TYPE_BINFO_BASETYPES (type);
2066 int n_bases = TREE_VEC_LENGTH (bases);
2069 for (i = 0; i < n_bases; ++i)
2071 tree binfo = TREE_VEC_ELT (bases, i);
2073 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2074 tree type = BINFO_TYPE (binfo);
2076 num = classify_argument (TYPE_MODE (type),
2078 (offset + (bit_offset % 64)) % 256);
2081 for (i = 0; i < num; i++)
2083 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2085 merge_classes (subclasses[i], classes[i + pos]);
2089 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2091 if (TREE_CODE (field) == FIELD_DECL)
2094 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2095 TREE_TYPE (field), subclasses,
2099 for (i = 0; i < num; i++)
2100 classes[i] = merge_classes (subclasses[i], classes[i]);
2104 else if (TREE_CODE (type) == SET_TYPE)
2108 classes[0] = X86_64_INTEGERSI_CLASS;
2111 else if (bytes <= 8)
2113 classes[0] = X86_64_INTEGER_CLASS;
2116 else if (bytes <= 12)
2118 classes[0] = X86_64_INTEGER_CLASS;
2119 classes[1] = X86_64_INTEGERSI_CLASS;
2124 classes[0] = X86_64_INTEGER_CLASS;
2125 classes[1] = X86_64_INTEGER_CLASS;
2132 /* Final merger cleanup. */
2133 for (i = 0; i < words; i++)
2135 /* If one class is MEMORY, everything should be passed in
2137 if (classes[i] == X86_64_MEMORY_CLASS)
2140 /* The X86_64_SSEUP_CLASS should be always preceded by
2141 X86_64_SSE_CLASS. */
2142 if (classes[i] == X86_64_SSEUP_CLASS
2143 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2144 classes[i] = X86_64_SSE_CLASS;
2146 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2147 if (classes[i] == X86_64_X87UP_CLASS
2148 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2149 classes[i] = X86_64_SSE_CLASS;
2154 /* Compute alignment needed. We align all types to natural boundaries with
2155 exception of XFmode that is aligned to 64bits. */
2156 if (mode != VOIDmode && mode != BLKmode)
2158 int mode_alignment = GET_MODE_BITSIZE (mode);
2161 mode_alignment = 128;
2162 else if (mode == XCmode)
2163 mode_alignment = 256;
2164 /* Misaligned fields are always returned in memory. */
2165 if (bit_offset % mode_alignment)
2169 /* Classification of atomic types. */
2179 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2180 classes[0] = X86_64_INTEGERSI_CLASS;
2182 classes[0] = X86_64_INTEGER_CLASS;
2187 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2190 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2191 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2194 if (!(bit_offset % 64))
2195 classes[0] = X86_64_SSESF_CLASS;
2197 classes[0] = X86_64_SSE_CLASS;
2200 classes[0] = X86_64_SSEDF_CLASS;
2203 classes[0] = X86_64_X87_CLASS;
2204 classes[1] = X86_64_X87UP_CLASS;
2207 classes[0] = X86_64_INTEGER_CLASS;
2208 classes[1] = X86_64_INTEGER_CLASS;
2211 classes[0] = X86_64_X87_CLASS;
2212 classes[1] = X86_64_X87UP_CLASS;
2213 classes[2] = X86_64_X87_CLASS;
2214 classes[3] = X86_64_X87UP_CLASS;
2217 classes[0] = X86_64_SSEDF_CLASS;
2218 classes[1] = X86_64_SSEDF_CLASS;
2221 classes[0] = X86_64_SSE_CLASS;
2229 classes[0] = X86_64_SSE_CLASS;
2230 classes[1] = X86_64_SSEUP_CLASS;
2245 /* Examine the argument and return set number of register required in each
2246 class. Return 0 iff parameter should be passed in memory. */
2248 examine_argument (enum machine_mode mode, tree type, int in_return,
2249 int *int_nregs, int *sse_nregs)
2251 enum x86_64_reg_class class[MAX_CLASSES];
2252 int n = classify_argument (mode, type, class, 0);
2258 for (n--; n >= 0; n--)
2261 case X86_64_INTEGER_CLASS:
2262 case X86_64_INTEGERSI_CLASS:
2265 case X86_64_SSE_CLASS:
2266 case X86_64_SSESF_CLASS:
2267 case X86_64_SSEDF_CLASS:
2270 case X86_64_NO_CLASS:
2271 case X86_64_SSEUP_CLASS:
2273 case X86_64_X87_CLASS:
2274 case X86_64_X87UP_CLASS:
2278 case X86_64_MEMORY_CLASS:
2283 /* Construct container for the argument used by GCC interface. See
2284 FUNCTION_ARG for the detailed description. */
2286 construct_container (enum machine_mode mode, tree type, int in_return,
2287 int nintregs, int nsseregs, const int * intreg,
2290 enum machine_mode tmpmode;
2292 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2293 enum x86_64_reg_class class[MAX_CLASSES];
2297 int needed_sseregs, needed_intregs;
2298 rtx exp[MAX_CLASSES];
2301 n = classify_argument (mode, type, class, 0);
2302 if (TARGET_DEBUG_ARG)
2305 fprintf (stderr, "Memory class\n");
2308 fprintf (stderr, "Classes:");
2309 for (i = 0; i < n; i++)
2311 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2313 fprintf (stderr, "\n");
2318 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2320 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2323 /* First construct simple cases. Avoid SCmode, since we want to use
2324 single register to pass this type. */
2325 if (n == 1 && mode != SCmode)
2328 case X86_64_INTEGER_CLASS:
2329 case X86_64_INTEGERSI_CLASS:
2330 return gen_rtx_REG (mode, intreg[0]);
2331 case X86_64_SSE_CLASS:
2332 case X86_64_SSESF_CLASS:
2333 case X86_64_SSEDF_CLASS:
2334 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2335 case X86_64_X87_CLASS:
2336 return gen_rtx_REG (mode, FIRST_STACK_REG);
2337 case X86_64_NO_CLASS:
2338 /* Zero sized array, struct or class. */
2343 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2344 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2346 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2347 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2348 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2349 && class[1] == X86_64_INTEGER_CLASS
2350 && (mode == CDImode || mode == TImode || mode == TFmode)
2351 && intreg[0] + 1 == intreg[1])
2352 return gen_rtx_REG (mode, intreg[0]);
2354 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2355 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2356 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2358 /* Otherwise figure out the entries of the PARALLEL. */
2359 for (i = 0; i < n; i++)
2363 case X86_64_NO_CLASS:
2365 case X86_64_INTEGER_CLASS:
2366 case X86_64_INTEGERSI_CLASS:
2367 /* Merge TImodes on aligned occasions here too. */
2368 if (i * 8 + 8 > bytes)
2369 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2370 else if (class[i] == X86_64_INTEGERSI_CLASS)
2374 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2375 if (tmpmode == BLKmode)
2377 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2378 gen_rtx_REG (tmpmode, *intreg),
2382 case X86_64_SSESF_CLASS:
2383 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2384 gen_rtx_REG (SFmode,
2385 SSE_REGNO (sse_regno)),
2389 case X86_64_SSEDF_CLASS:
2390 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2391 gen_rtx_REG (DFmode,
2392 SSE_REGNO (sse_regno)),
2396 case X86_64_SSE_CLASS:
2397 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2401 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2402 gen_rtx_REG (tmpmode,
2403 SSE_REGNO (sse_regno)),
2405 if (tmpmode == TImode)
2413 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2414 for (i = 0; i < nexps; i++)
2415 XVECEXP (ret, 0, i) = exp [i];
2419 /* Update the data in CUM to advance over an argument
2420 of mode MODE and data type TYPE.
2421 (TYPE is null for libcalls where that information may not be available.) */
2424 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2425 enum machine_mode mode, /* current arg mode */
2426 tree type, /* type of the argument or 0 if lib support */
2427 int named) /* whether or not the argument was named */
2430 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2431 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2433 if (TARGET_DEBUG_ARG)
2435 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2436 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2439 int int_nregs, sse_nregs;
2440 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2441 cum->words += words;
2442 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2444 cum->nregs -= int_nregs;
2445 cum->sse_nregs -= sse_nregs;
2446 cum->regno += int_nregs;
2447 cum->sse_regno += sse_nregs;
2450 cum->words += words;
2454 if (TARGET_SSE && mode == TImode)
2456 cum->sse_words += words;
2457 cum->sse_nregs -= 1;
2458 cum->sse_regno += 1;
2459 if (cum->sse_nregs <= 0)
2467 cum->words += words;
2468 cum->nregs -= words;
2469 cum->regno += words;
2471 if (cum->nregs <= 0)
2481 /* Define where to put the arguments to a function.
2482 Value is zero to push the argument on the stack,
2483 or a hard register in which to store the argument.
2485 MODE is the argument's machine mode.
2486 TYPE is the data type of the argument (as a tree).
2487 This is null for libcalls where that information may
2489 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2490 the preceding args and about the function being called.
2491 NAMED is nonzero if this argument is a named parameter
2492 (otherwise it is an extra parameter matching an ellipsis). */
2495 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2496 enum machine_mode mode, /* current arg mode */
2497 tree type, /* type of the argument or 0 if lib support */
2498 int named) /* != 0 for normal args, == 0 for ... args */
2502 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2503 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2505 /* Handle a hidden AL argument containing number of registers for varargs
2506 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2508 if (mode == VOIDmode)
2511 return GEN_INT (cum->maybe_vaarg
2512 ? (cum->sse_nregs < 0
2520 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2521 &x86_64_int_parameter_registers [cum->regno],
2526 /* For now, pass fp/complex values on the stack. */
2538 if (words <= cum->nregs)
2540 int regno = cum->regno;
2542 /* Fastcall allocates the first two DWORD (SImode) or
2543 smaller arguments to ECX and EDX. */
2546 if (mode == BLKmode || mode == DImode)
2549 /* ECX not EAX is the first allocated register. */
2553 ret = gen_rtx_REG (mode, regno);
2558 ret = gen_rtx_REG (mode, cum->sse_regno);
2562 if (TARGET_DEBUG_ARG)
2565 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2566 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2569 print_simple_rtl (stderr, ret);
2571 fprintf (stderr, ", stack");
2573 fprintf (stderr, " )\n");
2579 /* A C expression that indicates when an argument must be passed by
2580 reference. If nonzero for an argument, a copy of that argument is
2581 made in memory and a pointer to the argument is passed instead of
2582 the argument itself. The pointer is passed in whatever way is
2583 appropriate for passing a pointer to that type. */
2586 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2587 enum machine_mode mode ATTRIBUTE_UNUSED,
2588 tree type, int named ATTRIBUTE_UNUSED)
2593 if (type && int_size_in_bytes (type) == -1)
2595 if (TARGET_DEBUG_ARG)
2596 fprintf (stderr, "function_arg_pass_by_reference\n");
2603 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2606 contains_128bit_aligned_vector_p (tree type)
2608 enum machine_mode mode = TYPE_MODE (type);
2609 if (SSE_REG_MODE_P (mode)
2610 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2612 if (TYPE_ALIGN (type) < 128)
2615 if (AGGREGATE_TYPE_P (type))
2617 /* Walk the aggregates recursively. */
2618 if (TREE_CODE (type) == RECORD_TYPE
2619 || TREE_CODE (type) == UNION_TYPE
2620 || TREE_CODE (type) == QUAL_UNION_TYPE)
2624 if (TYPE_BINFO (type) != NULL
2625 && TYPE_BINFO_BASETYPES (type) != NULL)
2627 tree bases = TYPE_BINFO_BASETYPES (type);
2628 int n_bases = TREE_VEC_LENGTH (bases);
2631 for (i = 0; i < n_bases; ++i)
2633 tree binfo = TREE_VEC_ELT (bases, i);
2634 tree type = BINFO_TYPE (binfo);
2636 if (contains_128bit_aligned_vector_p (type))
2640 /* And now merge the fields of structure. */
2641 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2643 if (TREE_CODE (field) == FIELD_DECL
2644 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2648 /* Just for use if some languages passes arrays by value. */
2649 else if (TREE_CODE (type) == ARRAY_TYPE)
2651 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2660 /* Gives the alignment boundary, in bits, of an argument with the
2661 specified mode and type. */
2664 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2668 align = TYPE_ALIGN (type);
2670 align = GET_MODE_ALIGNMENT (mode);
2671 if (align < PARM_BOUNDARY)
2672 align = PARM_BOUNDARY;
2675 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2676 make an exception for SSE modes since these require 128bit
2679 The handling here differs from field_alignment. ICC aligns MMX
2680 arguments to 4 byte boundaries, while structure fields are aligned
2681 to 8 byte boundaries. */
2684 if (!SSE_REG_MODE_P (mode))
2685 align = PARM_BOUNDARY;
2689 if (!contains_128bit_aligned_vector_p (type))
2690 align = PARM_BOUNDARY;
2698 /* Return true if N is a possible register number of function value. */
2700 ix86_function_value_regno_p (int regno)
2704 return ((regno) == 0
2705 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2706 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2708 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2709 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2710 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2713 /* Define how to find the value returned by a function.
2714 VALTYPE is the data type of the value (as a tree).
2715 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2716 otherwise, FUNC is 0. */
2718 ix86_function_value (tree valtype)
2722 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2723 REGPARM_MAX, SSE_REGPARM_MAX,
2724 x86_64_int_return_registers, 0);
2725 /* For zero sized structures, construct_container return NULL, but we need
2726 to keep rest of compiler happy by returning meaningful value. */
2728 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2732 return gen_rtx_REG (TYPE_MODE (valtype),
2733 ix86_value_regno (TYPE_MODE (valtype)));
2736 /* Return false iff type is returned in memory. */
2738 ix86_return_in_memory (tree type)
2740 int needed_intregs, needed_sseregs, size;
2741 enum machine_mode mode = TYPE_MODE (type);
2744 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2746 if (mode == BLKmode)
2749 size = int_size_in_bytes (type);
2751 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2754 if (VECTOR_MODE_P (mode) || mode == TImode)
2756 /* User-created vectors small enough to fit in EAX. */
2760 /* MMX/3dNow values are returned on the stack, since we've
2761 got to EMMS/FEMMS before returning. */
2765 /* SSE values are returned in XMM0. */
2766 /* ??? Except when it doesn't exist? We have a choice of
2767 either (1) being abi incompatible with a -march switch,
2768 or (2) generating an error here. Given no good solution,
2769 I think the safest thing is one warning. The user won't
2770 be able to use -Werror, but... */
2781 warning ("SSE vector return without SSE enabled "
2788 if (mode == TFmode || mode == XFmode)
2796 /* Define how to find the value returned by a library function
2797 assuming the value has mode MODE. */
2799 ix86_libcall_value (enum machine_mode mode)
2809 return gen_rtx_REG (mode, FIRST_SSE_REG);
2812 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2815 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
2816 XVECEXP (ret, 0, 0) = gen_rtx_EXPR_LIST
2818 gen_rtx_REG (DImode, x86_64_int_parameter_registers [0]),
2820 XVECEXP (ret, 0, 1) = gen_rtx_EXPR_LIST
2822 gen_rtx_REG (DImode, x86_64_int_parameter_registers [1]),
2829 return gen_rtx_REG (mode, 0);
2833 return gen_rtx_REG (mode, ix86_value_regno (mode));
2836 /* Given a mode, return the register to use for a return value. */
2839 ix86_value_regno (enum machine_mode mode)
2841 /* Floating point return values in %st(0). */
2842 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2843 return FIRST_FLOAT_REG;
2844 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2845 we prevent this case when sse is not available. */
2846 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2847 return FIRST_SSE_REG;
2848 /* Everything else in %eax. */
2852 /* Create the va_list data type. */
2855 ix86_build_builtin_va_list (void)
2857 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2859 /* For i386 we use plain pointer to argument area. */
2861 return build_pointer_type (char_type_node);
2863 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2864 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2866 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2867 unsigned_type_node);
2868 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2869 unsigned_type_node);
2870 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2872 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2875 DECL_FIELD_CONTEXT (f_gpr) = record;
2876 DECL_FIELD_CONTEXT (f_fpr) = record;
2877 DECL_FIELD_CONTEXT (f_ovf) = record;
2878 DECL_FIELD_CONTEXT (f_sav) = record;
2880 TREE_CHAIN (record) = type_decl;
2881 TYPE_NAME (record) = type_decl;
2882 TYPE_FIELDS (record) = f_gpr;
2883 TREE_CHAIN (f_gpr) = f_fpr;
2884 TREE_CHAIN (f_fpr) = f_ovf;
2885 TREE_CHAIN (f_ovf) = f_sav;
2887 layout_type (record);
2889 /* The correct type is an array type of one element. */
2890 return build_array_type (record, build_index_type (size_zero_node));
2893 /* Perform any needed actions needed for a function that is receiving a
2894 variable number of arguments.
2898 MODE and TYPE are the mode and type of the current parameter.
2900 PRETEND_SIZE is a variable that should be set to the amount of stack
2901 that must be pushed by the prolog to pretend that our caller pushed
2904 Normally, this macro will push all remaining incoming registers on the
2905 stack and set PRETEND_SIZE to the length of the registers pushed. */
2908 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2909 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2912 CUMULATIVE_ARGS next_cum;
2913 rtx save_area = NULL_RTX, mem;
2926 /* Indicate to allocate space on the stack for varargs save area. */
2927 ix86_save_varrargs_registers = 1;
2929 cfun->stack_alignment_needed = 128;
2931 fntype = TREE_TYPE (current_function_decl);
2932 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2933 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2934 != void_type_node));
2936 /* For varargs, we do not want to skip the dummy va_dcl argument.
2937 For stdargs, we do want to skip the last named argument. */
2940 function_arg_advance (&next_cum, mode, type, 1);
2943 save_area = frame_pointer_rtx;
2945 set = get_varargs_alias_set ();
2947 for (i = next_cum.regno; i < ix86_regparm; i++)
2949 mem = gen_rtx_MEM (Pmode,
2950 plus_constant (save_area, i * UNITS_PER_WORD));
2951 set_mem_alias_set (mem, set);
2952 emit_move_insn (mem, gen_rtx_REG (Pmode,
2953 x86_64_int_parameter_registers[i]));
2956 if (next_cum.sse_nregs)
2958 /* Now emit code to save SSE registers. The AX parameter contains number
2959 of SSE parameter registers used to call this function. We use
2960 sse_prologue_save insn template that produces computed jump across
2961 SSE saves. We need some preparation work to get this working. */
2963 label = gen_label_rtx ();
2964 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2966 /* Compute address to jump to :
2967 label - 5*eax + nnamed_sse_arguments*5 */
2968 tmp_reg = gen_reg_rtx (Pmode);
2969 nsse_reg = gen_reg_rtx (Pmode);
2970 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2971 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2972 gen_rtx_MULT (Pmode, nsse_reg,
2974 if (next_cum.sse_regno)
2977 gen_rtx_CONST (DImode,
2978 gen_rtx_PLUS (DImode,
2980 GEN_INT (next_cum.sse_regno * 4))));
2982 emit_move_insn (nsse_reg, label_ref);
2983 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2985 /* Compute address of memory block we save into. We always use pointer
2986 pointing 127 bytes after first byte to store - this is needed to keep
2987 instruction size limited by 4 bytes. */
2988 tmp_reg = gen_reg_rtx (Pmode);
2989 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2990 plus_constant (save_area,
2991 8 * REGPARM_MAX + 127)));
2992 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2993 set_mem_alias_set (mem, set);
2994 set_mem_align (mem, BITS_PER_WORD);
2996 /* And finally do the dirty job! */
2997 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2998 GEN_INT (next_cum.sse_regno), label));
3003 /* Implement va_start. */
3006 ix86_va_start (tree valist, rtx nextarg)
3008 HOST_WIDE_INT words, n_gpr, n_fpr;
3009 tree f_gpr, f_fpr, f_ovf, f_sav;
3010 tree gpr, fpr, ovf, sav, t;
3012 /* Only 64bit target needs something special. */
3015 std_expand_builtin_va_start (valist, nextarg);
3019 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3020 f_fpr = TREE_CHAIN (f_gpr);
3021 f_ovf = TREE_CHAIN (f_fpr);
3022 f_sav = TREE_CHAIN (f_ovf);
3024 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3025 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3026 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3027 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3028 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3030 /* Count number of gp and fp argument registers used. */
3031 words = current_function_args_info.words;
3032 n_gpr = current_function_args_info.regno;
3033 n_fpr = current_function_args_info.sse_regno;
3035 if (TARGET_DEBUG_ARG)
3036 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3037 (int) words, (int) n_gpr, (int) n_fpr);
3039 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3040 build_int_2 (n_gpr * 8, 0));
3041 TREE_SIDE_EFFECTS (t) = 1;
3042 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3044 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3045 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3046 TREE_SIDE_EFFECTS (t) = 1;
3047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3049 /* Find the overflow area. */
3050 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3052 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3053 build_int_2 (words * UNITS_PER_WORD, 0));
3054 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3055 TREE_SIDE_EFFECTS (t) = 1;
3056 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3058 /* Find the register save area.
3059 Prologue of the function save it right above stack frame. */
3060 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3061 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3062 TREE_SIDE_EFFECTS (t) = 1;
3063 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3066 /* Implement va_arg. */
3068 ix86_va_arg (tree valist, tree type)
3070 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3071 tree f_gpr, f_fpr, f_ovf, f_sav;
3072 tree gpr, fpr, ovf, sav, t;
3074 rtx lab_false, lab_over = NULL_RTX;
3079 /* Only 64bit target needs something special. */
3082 return std_expand_builtin_va_arg (valist, type);
3085 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3086 f_fpr = TREE_CHAIN (f_gpr);
3087 f_ovf = TREE_CHAIN (f_fpr);
3088 f_sav = TREE_CHAIN (f_ovf);
3090 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3091 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3092 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3093 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3094 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3096 size = int_size_in_bytes (type);
3099 /* Passed by reference. */
3101 type = build_pointer_type (type);
3102 size = int_size_in_bytes (type);
3104 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3106 container = construct_container (TYPE_MODE (type), type, 0,
3107 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3109 * Pull the value out of the saved registers ...
3112 addr_rtx = gen_reg_rtx (Pmode);
3116 rtx int_addr_rtx, sse_addr_rtx;
3117 int needed_intregs, needed_sseregs;
3120 lab_over = gen_label_rtx ();
3121 lab_false = gen_label_rtx ();
3123 examine_argument (TYPE_MODE (type), type, 0,
3124 &needed_intregs, &needed_sseregs);
3127 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3128 || TYPE_ALIGN (type) > 128);
3130 /* In case we are passing structure, verify that it is consecutive block
3131 on the register save area. If not we need to do moves. */
3132 if (!need_temp && !REG_P (container))
3134 /* Verify that all registers are strictly consecutive */
3135 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3139 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3141 rtx slot = XVECEXP (container, 0, i);
3142 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3143 || INTVAL (XEXP (slot, 1)) != i * 16)
3151 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3153 rtx slot = XVECEXP (container, 0, i);
3154 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3155 || INTVAL (XEXP (slot, 1)) != i * 8)
3162 int_addr_rtx = addr_rtx;
3163 sse_addr_rtx = addr_rtx;
3167 int_addr_rtx = gen_reg_rtx (Pmode);
3168 sse_addr_rtx = gen_reg_rtx (Pmode);
3170 /* First ensure that we fit completely in registers. */
3173 emit_cmp_and_jump_insns (expand_expr
3174 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3175 GEN_INT ((REGPARM_MAX - needed_intregs +
3176 1) * 8), GE, const1_rtx, SImode,
3181 emit_cmp_and_jump_insns (expand_expr
3182 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3183 GEN_INT ((SSE_REGPARM_MAX -
3184 needed_sseregs + 1) * 16 +
3185 REGPARM_MAX * 8), GE, const1_rtx,
3186 SImode, 1, lab_false);
3189 /* Compute index to start of area used for integer regs. */
3192 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3193 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3194 if (r != int_addr_rtx)
3195 emit_move_insn (int_addr_rtx, r);
3199 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3200 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3201 if (r != sse_addr_rtx)
3202 emit_move_insn (sse_addr_rtx, r);
3210 /* Never use the memory itself, as it has the alias set. */
3211 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3212 mem = gen_rtx_MEM (BLKmode, x);
3213 force_operand (x, addr_rtx);
3214 set_mem_alias_set (mem, get_varargs_alias_set ());
3215 set_mem_align (mem, BITS_PER_UNIT);
3217 for (i = 0; i < XVECLEN (container, 0); i++)
3219 rtx slot = XVECEXP (container, 0, i);
3220 rtx reg = XEXP (slot, 0);
3221 enum machine_mode mode = GET_MODE (reg);
3227 if (SSE_REGNO_P (REGNO (reg)))
3229 src_addr = sse_addr_rtx;
3230 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3234 src_addr = int_addr_rtx;
3235 src_offset = REGNO (reg) * 8;
3237 src_mem = gen_rtx_MEM (mode, src_addr);
3238 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3239 src_mem = adjust_address (src_mem, mode, src_offset);
3240 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3241 emit_move_insn (dest_mem, src_mem);
3248 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3249 build_int_2 (needed_intregs * 8, 0));
3250 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3251 TREE_SIDE_EFFECTS (t) = 1;
3252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3257 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3258 build_int_2 (needed_sseregs * 16, 0));
3259 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3260 TREE_SIDE_EFFECTS (t) = 1;
3261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3264 emit_jump_insn (gen_jump (lab_over));
3266 emit_label (lab_false);
3269 /* ... otherwise out of the overflow area. */
3271 /* Care for on-stack alignment if needed. */
3272 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3276 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3277 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3278 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3282 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3284 emit_move_insn (addr_rtx, r);
3287 build (PLUS_EXPR, TREE_TYPE (t), t,
3288 build_int_2 (rsize * UNITS_PER_WORD, 0));
3289 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3290 TREE_SIDE_EFFECTS (t) = 1;
3291 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3294 emit_label (lab_over);
3298 r = gen_rtx_MEM (Pmode, addr_rtx);
3299 set_mem_alias_set (r, get_varargs_alias_set ());
3300 emit_move_insn (addr_rtx, r);
3306 /* Return nonzero if OP is either a i387 or SSE fp register. */
3308 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3310 return ANY_FP_REG_P (op);
3313 /* Return nonzero if OP is an i387 fp register. */
3315 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3317 return FP_REG_P (op);
3320 /* Return nonzero if OP is a non-fp register_operand. */
3322 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3324 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3327 /* Return nonzero if OP is a register operand other than an
3328 i387 fp register. */
3330 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3332 return register_operand (op, mode) && !FP_REG_P (op);
3335 /* Return nonzero if OP is general operand representable on x86_64. */
3338 x86_64_general_operand (rtx op, enum machine_mode mode)
3341 return general_operand (op, mode);
3342 if (nonimmediate_operand (op, mode))
3344 return x86_64_sign_extended_value (op);
3347 /* Return nonzero if OP is general operand representable on x86_64
3348 as either sign extended or zero extended constant. */
3351 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3354 return general_operand (op, mode);
3355 if (nonimmediate_operand (op, mode))
3357 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3360 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3363 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3366 return nonmemory_operand (op, mode);
3367 if (register_operand (op, mode))
3369 return x86_64_sign_extended_value (op);
3372 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3375 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3377 if (!TARGET_64BIT || !flag_pic)
3378 return nonmemory_operand (op, mode);
3379 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3381 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3386 /* Return nonzero if OPNUM's MEM should be matched
3387 in movabs* patterns. */
3390 ix86_check_movabs (rtx insn, int opnum)
3394 set = PATTERN (insn);
3395 if (GET_CODE (set) == PARALLEL)
3396 set = XVECEXP (set, 0, 0);
3397 if (GET_CODE (set) != SET)
3399 mem = XEXP (set, opnum);
3400 while (GET_CODE (mem) == SUBREG)
3401 mem = SUBREG_REG (mem);
3402 if (GET_CODE (mem) != MEM)
3404 return (volatile_ok || !MEM_VOLATILE_P (mem));
3407 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3410 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3413 return nonmemory_operand (op, mode);
3414 if (register_operand (op, mode))
3416 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3419 /* Return nonzero if OP is immediate operand representable on x86_64. */
3422 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3425 return immediate_operand (op, mode);
3426 return x86_64_sign_extended_value (op);
3429 /* Return nonzero if OP is immediate operand representable on x86_64. */
3432 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3434 return x86_64_zero_extended_value (op);
3437 /* Return nonzero if OP is (const_int 1), else return zero. */
3440 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3442 return op == const1_rtx;
3445 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3446 for shift & compare patterns, as shifting by 0 does not change flags),
3447 else return zero. */
3450 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3452 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3455 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3456 reference and a constant. */
3459 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3461 switch (GET_CODE (op))
3469 if (GET_CODE (op) == SYMBOL_REF
3470 || GET_CODE (op) == LABEL_REF
3471 || (GET_CODE (op) == UNSPEC
3472 && (XINT (op, 1) == UNSPEC_GOT
3473 || XINT (op, 1) == UNSPEC_GOTOFF
3474 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3476 if (GET_CODE (op) != PLUS
3477 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3481 if (GET_CODE (op) == SYMBOL_REF
3482 || GET_CODE (op) == LABEL_REF)
3484 /* Only @GOTOFF gets offsets. */
3485 if (GET_CODE (op) != UNSPEC
3486 || XINT (op, 1) != UNSPEC_GOTOFF)
3489 op = XVECEXP (op, 0, 0);
3490 if (GET_CODE (op) == SYMBOL_REF
3491 || GET_CODE (op) == LABEL_REF)
3500 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3503 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3505 if (GET_CODE (op) != CONST)
3510 if (GET_CODE (op) == UNSPEC
3511 && XINT (op, 1) == UNSPEC_GOTPCREL)
3513 if (GET_CODE (op) == PLUS
3514 && GET_CODE (XEXP (op, 0)) == UNSPEC
3515 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3520 if (GET_CODE (op) == UNSPEC)
3522 if (GET_CODE (op) != PLUS
3523 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3526 if (GET_CODE (op) == UNSPEC)
3532 /* Return true if OP is a symbolic operand that resolves locally. */
3535 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3537 if (GET_CODE (op) == CONST
3538 && GET_CODE (XEXP (op, 0)) == PLUS
3539 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3540 op = XEXP (XEXP (op, 0), 0);
3542 if (GET_CODE (op) == LABEL_REF)
3545 if (GET_CODE (op) != SYMBOL_REF)
3548 if (SYMBOL_REF_LOCAL_P (op))
3551 /* There is, however, a not insubstantial body of code in the rest of
3552 the compiler that assumes it can just stick the results of
3553 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3554 /* ??? This is a hack. Should update the body of the compiler to
3555 always create a DECL an invoke targetm.encode_section_info. */
3556 if (strncmp (XSTR (op, 0), internal_label_prefix,
3557 internal_label_prefix_len) == 0)
3563 /* Test for various thread-local symbols. */
3566 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3568 if (GET_CODE (op) != SYMBOL_REF)
3570 return SYMBOL_REF_TLS_MODEL (op);
3574 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3576 if (GET_CODE (op) != SYMBOL_REF)
3578 return SYMBOL_REF_TLS_MODEL (op) == kind;
3582 global_dynamic_symbolic_operand (register rtx op,
3583 enum machine_mode mode ATTRIBUTE_UNUSED)
3585 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3589 local_dynamic_symbolic_operand (register rtx op,
3590 enum machine_mode mode ATTRIBUTE_UNUSED)
3592 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3596 initial_exec_symbolic_operand (register rtx op,
3597 enum machine_mode mode ATTRIBUTE_UNUSED)
3599 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3603 local_exec_symbolic_operand (register rtx op,
3604 enum machine_mode mode ATTRIBUTE_UNUSED)
3606 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3609 /* Test for a valid operand for a call instruction. Don't allow the
3610 arg pointer register or virtual regs since they may decay into
3611 reg + const, which the patterns can't handle. */
3614 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3616 /* Disallow indirect through a virtual register. This leads to
3617 compiler aborts when trying to eliminate them. */
3618 if (GET_CODE (op) == REG
3619 && (op == arg_pointer_rtx
3620 || op == frame_pointer_rtx
3621 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3622 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3625 /* Disallow `call 1234'. Due to varying assembler lameness this
3626 gets either rejected or translated to `call .+1234'. */
3627 if (GET_CODE (op) == CONST_INT)
3630 /* Explicitly allow SYMBOL_REF even if pic. */
3631 if (GET_CODE (op) == SYMBOL_REF)
3634 /* Otherwise we can allow any general_operand in the address. */
3635 return general_operand (op, Pmode);
3638 /* Test for a valid operand for a call instruction. Don't allow the
3639 arg pointer register or virtual regs since they may decay into
3640 reg + const, which the patterns can't handle. */
3643 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3645 /* Disallow indirect through a virtual register. This leads to
3646 compiler aborts when trying to eliminate them. */
3647 if (GET_CODE (op) == REG
3648 && (op == arg_pointer_rtx
3649 || op == frame_pointer_rtx
3650 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3651 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3654 /* Explicitly allow SYMBOL_REF even if pic. */
3655 if (GET_CODE (op) == SYMBOL_REF)
3658 /* Otherwise we can only allow register operands. */
3659 return register_operand (op, Pmode);
3663 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3665 if (GET_CODE (op) == CONST
3666 && GET_CODE (XEXP (op, 0)) == PLUS
3667 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3668 op = XEXP (XEXP (op, 0), 0);
3669 return GET_CODE (op) == SYMBOL_REF;
3672 /* Match exactly zero and one. */
3675 const0_operand (register rtx op, enum machine_mode mode)
3677 return op == CONST0_RTX (mode);
3681 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3683 return op == const1_rtx;
3686 /* Match 2, 4, or 8. Used for leal multiplicands. */
3689 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3691 return (GET_CODE (op) == CONST_INT
3692 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3696 const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3698 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3702 const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3704 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3708 const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3710 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3714 const_0_to_255_operand (register rtx op,
3715 enum machine_mode mode ATTRIBUTE_UNUSED)
3717 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3721 /* True if this is a constant appropriate for an increment or decrement. */
3724 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3726 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3727 registers, since carry flag is not set. */
3728 if (TARGET_PENTIUM4 && !optimize_size)
3730 return op == const1_rtx || op == constm1_rtx;
3733 /* Return nonzero if OP is acceptable as operand of DImode shift
3737 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3740 return nonimmediate_operand (op, mode);
3742 return register_operand (op, mode);
3745 /* Return false if this is the stack pointer, or any other fake
3746 register eliminable to the stack pointer. Otherwise, this is
3749 This is used to prevent esp from being used as an index reg.
3750 Which would only happen in pathological cases. */
3753 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3756 if (GET_CODE (t) == SUBREG)
3758 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3761 return register_operand (op, mode);
3765 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3767 return MMX_REG_P (op);
3770 /* Return false if this is any eliminable register. Otherwise
3774 general_no_elim_operand (register rtx op, enum machine_mode mode)
3777 if (GET_CODE (t) == SUBREG)
3779 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3780 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3781 || t == virtual_stack_dynamic_rtx)
3784 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3785 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3788 return general_operand (op, mode);
3791 /* Return false if this is any eliminable register. Otherwise
3792 register_operand or const_int. */
3795 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3798 if (GET_CODE (t) == SUBREG)
3800 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3801 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3802 || t == virtual_stack_dynamic_rtx)
3805 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3808 /* Return false if this is any eliminable register or stack register,
3809 otherwise work like register_operand. */
3812 index_register_operand (register rtx op, enum machine_mode mode)
3815 if (GET_CODE (t) == SUBREG)
3819 if (t == arg_pointer_rtx
3820 || t == frame_pointer_rtx
3821 || t == virtual_incoming_args_rtx
3822 || t == virtual_stack_vars_rtx
3823 || t == virtual_stack_dynamic_rtx
3824 || REGNO (t) == STACK_POINTER_REGNUM)
3827 return general_operand (op, mode);
3830 /* Return true if op is a Q_REGS class register. */
3833 q_regs_operand (register rtx op, enum machine_mode mode)
3835 if (mode != VOIDmode && GET_MODE (op) != mode)
3837 if (GET_CODE (op) == SUBREG)
3838 op = SUBREG_REG (op);
3839 return ANY_QI_REG_P (op);
3842 /* Return true if op is an flags register. */
3845 flags_reg_operand (register rtx op, enum machine_mode mode)
3847 if (mode != VOIDmode && GET_MODE (op) != mode)
3849 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3852 /* Return true if op is a NON_Q_REGS class register. */
3855 non_q_regs_operand (register rtx op, enum machine_mode mode)
3857 if (mode != VOIDmode && GET_MODE (op) != mode)
3859 if (GET_CODE (op) == SUBREG)
3860 op = SUBREG_REG (op);
3861 return NON_QI_REG_P (op);
3865 zero_extended_scalar_load_operand (rtx op,
3866 enum machine_mode mode ATTRIBUTE_UNUSED)
3869 if (GET_CODE (op) != MEM)
3871 op = maybe_get_pool_constant (op);
3874 if (GET_CODE (op) != CONST_VECTOR)
3877 (GET_MODE_SIZE (GET_MODE (op)) /
3878 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3879 for (n_elts--; n_elts > 0; n_elts--)
3881 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3882 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3888 /* Return 1 when OP is operand acceptable for standard SSE move. */
3890 vector_move_operand (rtx op, enum machine_mode mode)
3892 if (nonimmediate_operand (op, mode))
3894 if (GET_MODE (op) != mode && mode != VOIDmode)
3896 return (op == CONST0_RTX (GET_MODE (op)));
3899 /* Return true if op if a valid address, and does not contain
3900 a segment override. */
3903 no_seg_address_operand (register rtx op, enum machine_mode mode)
3905 struct ix86_address parts;
3907 if (! address_operand (op, mode))
3910 if (! ix86_decompose_address (op, &parts))
3913 return parts.seg == SEG_DEFAULT;
3916 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3919 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3921 enum rtx_code code = GET_CODE (op);
3924 /* Operations supported directly. */
3934 /* These are equivalent to ones above in non-IEEE comparisons. */
3941 return !TARGET_IEEE_FP;
3946 /* Return 1 if OP is a valid comparison operator in valid mode. */
3948 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3950 enum machine_mode inmode;
3951 enum rtx_code code = GET_CODE (op);
3952 if (mode != VOIDmode && GET_MODE (op) != mode)
3954 if (GET_RTX_CLASS (code) != '<')
3956 inmode = GET_MODE (XEXP (op, 0));
3958 if (inmode == CCFPmode || inmode == CCFPUmode)
3960 enum rtx_code second_code, bypass_code;
3961 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3962 return (bypass_code == NIL && second_code == NIL);
3969 if (inmode == CCmode || inmode == CCGCmode
3970 || inmode == CCGOCmode || inmode == CCNOmode)
3973 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3974 if (inmode == CCmode)
3978 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3986 /* Return 1 if OP is a valid comparison operator testing carry flag
3989 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3991 enum machine_mode inmode;
3992 enum rtx_code code = GET_CODE (op);
3994 if (mode != VOIDmode && GET_MODE (op) != mode)
3996 if (GET_RTX_CLASS (code) != '<')
3998 inmode = GET_MODE (XEXP (op, 0));
3999 if (GET_CODE (XEXP (op, 0)) != REG
4000 || REGNO (XEXP (op, 0)) != 17
4001 || XEXP (op, 1) != const0_rtx)
4004 if (inmode == CCFPmode || inmode == CCFPUmode)
4006 enum rtx_code second_code, bypass_code;
4008 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4009 if (bypass_code != NIL || second_code != NIL)
4011 code = ix86_fp_compare_code_to_integer (code);
4013 else if (inmode != CCmode)
4018 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4021 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
4023 enum machine_mode inmode;
4024 enum rtx_code code = GET_CODE (op);
4026 if (mode != VOIDmode && GET_MODE (op) != mode)
4028 if (GET_RTX_CLASS (code) != '<')
4030 inmode = GET_MODE (XEXP (op, 0));
4031 if (inmode == CCFPmode || inmode == CCFPUmode)
4033 enum rtx_code second_code, bypass_code;
4035 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4036 if (bypass_code != NIL || second_code != NIL)
4038 code = ix86_fp_compare_code_to_integer (code);
4040 /* i387 supports just limited amount of conditional codes. */
4043 case LTU: case GTU: case LEU: case GEU:
4044 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4047 case ORDERED: case UNORDERED:
4055 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4058 promotable_binary_operator (register rtx op,
4059 enum machine_mode mode ATTRIBUTE_UNUSED)
4061 switch (GET_CODE (op))
4064 /* Modern CPUs have same latency for HImode and SImode multiply,
4065 but 386 and 486 do HImode multiply faster. */
4066 return ix86_tune > PROCESSOR_I486;
4078 /* Nearly general operand, but accept any const_double, since we wish
4079 to be able to drop them into memory rather than have them get pulled
4083 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
4085 if (mode != VOIDmode && mode != GET_MODE (op))
4087 if (GET_CODE (op) == CONST_DOUBLE)
4089 return general_operand (op, mode);
4092 /* Match an SI or HImode register for a zero_extract. */
4095 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4098 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4099 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4102 if (!register_operand (op, VOIDmode))
4105 /* Be careful to accept only registers having upper parts. */
4106 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4107 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4110 /* Return 1 if this is a valid binary floating-point operation.
4111 OP is the expression matched, and MODE is its mode. */
4114 binary_fp_operator (register rtx op, enum machine_mode mode)
4116 if (mode != VOIDmode && mode != GET_MODE (op))
4119 switch (GET_CODE (op))
4125 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4133 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4135 return GET_CODE (op) == MULT;
4139 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4141 return GET_CODE (op) == DIV;
4145 arith_or_logical_operator (rtx op, enum machine_mode mode)
4147 return ((mode == VOIDmode || GET_MODE (op) == mode)
4148 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4149 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4152 /* Returns 1 if OP is memory operand with a displacement. */
4155 memory_displacement_operand (register rtx op, enum machine_mode mode)
4157 struct ix86_address parts;
4159 if (! memory_operand (op, mode))
4162 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4165 return parts.disp != NULL_RTX;
4168 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4169 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4171 ??? It seems likely that this will only work because cmpsi is an
4172 expander, and no actual insns use this. */
4175 cmpsi_operand (rtx op, enum machine_mode mode)
4177 if (nonimmediate_operand (op, mode))
4180 if (GET_CODE (op) == AND
4181 && GET_MODE (op) == SImode
4182 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4183 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4184 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4185 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4186 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4187 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4193 /* Returns 1 if OP is memory operand that can not be represented by the
4197 long_memory_operand (register rtx op, enum machine_mode mode)
4199 if (! memory_operand (op, mode))
4202 return memory_address_length (op) != 0;
4205 /* Return nonzero if the rtx is known aligned. */
4208 aligned_operand (rtx op, enum machine_mode mode)
4210 struct ix86_address parts;
4212 if (!general_operand (op, mode))
4215 /* Registers and immediate operands are always "aligned". */
4216 if (GET_CODE (op) != MEM)
4219 /* Don't even try to do any aligned optimizations with volatiles. */
4220 if (MEM_VOLATILE_P (op))
4225 /* Pushes and pops are only valid on the stack pointer. */
4226 if (GET_CODE (op) == PRE_DEC
4227 || GET_CODE (op) == POST_INC)
4230 /* Decode the address. */
4231 if (! ix86_decompose_address (op, &parts))
4234 if (parts.base && GET_CODE (parts.base) == SUBREG)
4235 parts.base = SUBREG_REG (parts.base);
4236 if (parts.index && GET_CODE (parts.index) == SUBREG)
4237 parts.index = SUBREG_REG (parts.index);
4239 /* Look for some component that isn't known to be aligned. */
4243 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4248 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4253 if (GET_CODE (parts.disp) != CONST_INT
4254 || (INTVAL (parts.disp) & 3) != 0)
4258 /* Didn't find one -- this must be an aligned address. */
4262 /* Initialize the table of extra 80387 mathematical constants. */
4265 init_ext_80387_constants (void)
4267 static const char * cst[5] =
4269 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4270 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4271 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4272 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4273 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4277 for (i = 0; i < 5; i++)
4279 real_from_string (&ext_80387_constants_table[i], cst[i]);
4280 /* Ensure each constant is rounded to XFmode precision. */
4281 real_convert (&ext_80387_constants_table[i],
4282 XFmode, &ext_80387_constants_table[i]);
4285 ext_80387_constants_init = 1;
4288 /* Return true if the constant is something that can be loaded with
4289 a special instruction. */
4292 standard_80387_constant_p (rtx x)
4294 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4297 if (x == CONST0_RTX (GET_MODE (x)))
4299 if (x == CONST1_RTX (GET_MODE (x)))
4302 /* For XFmode constants, try to find a special 80387 instruction on
4303 those CPUs that benefit from them. */
4304 if (GET_MODE (x) == XFmode
4305 && x86_ext_80387_constants & TUNEMASK)
4310 if (! ext_80387_constants_init)
4311 init_ext_80387_constants ();
4313 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4314 for (i = 0; i < 5; i++)
4315 if (real_identical (&r, &ext_80387_constants_table[i]))
4322 /* Return the opcode of the special instruction to be used to load
4326 standard_80387_constant_opcode (rtx x)
4328 switch (standard_80387_constant_p (x))
4348 /* Return the CONST_DOUBLE representing the 80387 constant that is
4349 loaded by the specified special instruction. The argument IDX
4350 matches the return value from standard_80387_constant_p. */
4353 standard_80387_constant_rtx (int idx)
4357 if (! ext_80387_constants_init)
4358 init_ext_80387_constants ();
4374 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4378 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4381 standard_sse_constant_p (rtx x)
4383 if (x == const0_rtx)
4385 return (x == CONST0_RTX (GET_MODE (x)));
4388 /* Returns 1 if OP contains a symbol reference */
4391 symbolic_reference_mentioned_p (rtx op)
4393 register const char *fmt;
4396 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4399 fmt = GET_RTX_FORMAT (GET_CODE (op));
4400 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4406 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4407 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4411 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4418 /* Return 1 if it is appropriate to emit `ret' instructions in the
4419 body of a function. Do this only if the epilogue is simple, needing a
4420 couple of insns. Prior to reloading, we can't tell how many registers
4421 must be saved, so return 0 then. Return 0 if there is no frame
4422 marker to de-allocate.
4424 If NON_SAVING_SETJMP is defined and true, then it is not possible
4425 for the epilogue to be simple, so return 0. This is a special case
4426 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4427 until final, but jump_optimize may need to know sooner if a
4431 ix86_can_use_return_insn_p (void)
4433 struct ix86_frame frame;
4435 #ifdef NON_SAVING_SETJMP
4436 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4440 if (! reload_completed || frame_pointer_needed)
4443 /* Don't allow more than 32 pop, since that's all we can do
4444 with one instruction. */
4445 if (current_function_pops_args
4446 && current_function_args_size >= 32768)
4449 ix86_compute_frame_layout (&frame);
4450 return frame.to_allocate == 0 && frame.nregs == 0;
4453 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4455 x86_64_sign_extended_value (rtx value)
4457 switch (GET_CODE (value))
4459 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4460 to be at least 32 and this all acceptable constants are
4461 represented as CONST_INT. */
4463 if (HOST_BITS_PER_WIDE_INT == 32)
4467 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4468 return trunc_int_for_mode (val, SImode) == val;
4472 /* For certain code models, the symbolic references are known to fit.
4473 in CM_SMALL_PIC model we know it fits if it is local to the shared
4474 library. Don't count TLS SYMBOL_REFs here, since they should fit
4475 only if inside of UNSPEC handled below. */
4477 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4479 /* For certain code models, the code is near as well. */
4481 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4482 || ix86_cmodel == CM_KERNEL);
4484 /* We also may accept the offsetted memory references in certain special
4487 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4488 switch (XINT (XEXP (value, 0), 1))
4490 case UNSPEC_GOTPCREL:
4492 case UNSPEC_GOTNTPOFF:
4498 if (GET_CODE (XEXP (value, 0)) == PLUS)
4500 rtx op1 = XEXP (XEXP (value, 0), 0);
4501 rtx op2 = XEXP (XEXP (value, 0), 1);
4502 HOST_WIDE_INT offset;
4504 if (ix86_cmodel == CM_LARGE)
4506 if (GET_CODE (op2) != CONST_INT)
4508 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4509 switch (GET_CODE (op1))
4512 /* For CM_SMALL assume that latest object is 16MB before
4513 end of 31bits boundary. We may also accept pretty
4514 large negative constants knowing that all objects are
4515 in the positive half of address space. */
4516 if (ix86_cmodel == CM_SMALL
4517 && offset < 16*1024*1024
4518 && trunc_int_for_mode (offset, SImode) == offset)
4520 /* For CM_KERNEL we know that all object resist in the
4521 negative half of 32bits address space. We may not
4522 accept negative offsets, since they may be just off
4523 and we may accept pretty large positive ones. */
4524 if (ix86_cmodel == CM_KERNEL
4526 && trunc_int_for_mode (offset, SImode) == offset)
4530 /* These conditions are similar to SYMBOL_REF ones, just the
4531 constraints for code models differ. */
4532 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4533 && offset < 16*1024*1024
4534 && trunc_int_for_mode (offset, SImode) == offset)
4536 if (ix86_cmodel == CM_KERNEL
4538 && trunc_int_for_mode (offset, SImode) == offset)
4542 switch (XINT (op1, 1))
4547 && trunc_int_for_mode (offset, SImode) == offset)
4561 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4563 x86_64_zero_extended_value (rtx value)
4565 switch (GET_CODE (value))
4568 if (HOST_BITS_PER_WIDE_INT == 32)
4569 return (GET_MODE (value) == VOIDmode
4570 && !CONST_DOUBLE_HIGH (value));
4574 if (HOST_BITS_PER_WIDE_INT == 32)
4575 return INTVAL (value) >= 0;
4577 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4580 /* For certain code models, the symbolic references are known to fit. */
4582 return ix86_cmodel == CM_SMALL;
4584 /* For certain code models, the code is near as well. */
4586 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4588 /* We also may accept the offsetted memory references in certain special
4591 if (GET_CODE (XEXP (value, 0)) == PLUS)
4593 rtx op1 = XEXP (XEXP (value, 0), 0);
4594 rtx op2 = XEXP (XEXP (value, 0), 1);
4596 if (ix86_cmodel == CM_LARGE)
4598 switch (GET_CODE (op1))
4602 /* For small code model we may accept pretty large positive
4603 offsets, since one bit is available for free. Negative
4604 offsets are limited by the size of NULL pointer area
4605 specified by the ABI. */
4606 if (ix86_cmodel == CM_SMALL
4607 && GET_CODE (op2) == CONST_INT
4608 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4609 && (trunc_int_for_mode (INTVAL (op2), SImode)
4612 /* ??? For the kernel, we may accept adjustment of
4613 -0x10000000, since we know that it will just convert
4614 negative address space to positive, but perhaps this
4615 is not worthwhile. */
4618 /* These conditions are similar to SYMBOL_REF ones, just the
4619 constraints for code models differ. */
4620 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4621 && GET_CODE (op2) == CONST_INT
4622 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4623 && (trunc_int_for_mode (INTVAL (op2), SImode)
4637 /* Value should be nonzero if functions must have frame pointers.
4638 Zero means the frame pointer need not be set up (and parms may
4639 be accessed via the stack pointer) in functions that seem suitable. */
4642 ix86_frame_pointer_required (void)
4644 /* If we accessed previous frames, then the generated code expects
4645 to be able to access the saved ebp value in our frame. */
4646 if (cfun->machine->accesses_prev_frame)
4649 /* Several x86 os'es need a frame pointer for other reasons,
4650 usually pertaining to setjmp. */
4651 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4654 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4655 the frame pointer by default. Turn it back on now if we've not
4656 got a leaf function. */
4657 if (TARGET_OMIT_LEAF_FRAME_POINTER
4658 && (!current_function_is_leaf))
4661 if (current_function_profile)
4667 /* Record that the current function accesses previous call frames. */
4670 ix86_setup_frame_addresses (void)
4672 cfun->machine->accesses_prev_frame = 1;
4675 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4676 # define USE_HIDDEN_LINKONCE 1
4678 # define USE_HIDDEN_LINKONCE 0
4681 static int pic_labels_used;
4683 /* Fills in the label name that should be used for a pc thunk for
4684 the given register. */
4687 get_pc_thunk_name (char name[32], unsigned int regno)
4689 if (USE_HIDDEN_LINKONCE)
4690 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4692 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4696 /* This function generates code for -fpic that loads %ebx with
4697 the return address of the caller and then returns. */
4700 ix86_file_end (void)
4705 for (regno = 0; regno < 8; ++regno)
4709 if (! ((pic_labels_used >> regno) & 1))
4712 get_pc_thunk_name (name, regno);
4714 if (USE_HIDDEN_LINKONCE)
4718 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4720 TREE_PUBLIC (decl) = 1;
4721 TREE_STATIC (decl) = 1;
4722 DECL_ONE_ONLY (decl) = 1;
4724 (*targetm.asm_out.unique_section) (decl, 0);
4725 named_section (decl, NULL, 0);
4727 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4728 fputs ("\t.hidden\t", asm_out_file);
4729 assemble_name (asm_out_file, name);
4730 fputc ('\n', asm_out_file);
4731 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4736 ASM_OUTPUT_LABEL (asm_out_file, name);
4739 xops[0] = gen_rtx_REG (SImode, regno);
4740 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4741 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4742 output_asm_insn ("ret", xops);
4745 if (NEED_INDICATE_EXEC_STACK)
4746 file_end_indicate_exec_stack ();
4749 /* Emit code for the SET_GOT patterns. */
4752 output_set_got (rtx dest)
4757 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4759 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4761 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4764 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4766 output_asm_insn ("call\t%a2", xops);
4769 /* Output the "canonical" label name ("Lxx$pb") here too. This
4770 is what will be referred to by the Mach-O PIC subsystem. */
4771 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4773 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4774 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4777 output_asm_insn ("pop{l}\t%0", xops);
4782 get_pc_thunk_name (name, REGNO (dest));
4783 pic_labels_used |= 1 << REGNO (dest);
4785 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4786 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4787 output_asm_insn ("call\t%X2", xops);
4790 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4791 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4792 else if (!TARGET_MACHO)
4793 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4798 /* Generate an "push" pattern for input ARG. */
4803 return gen_rtx_SET (VOIDmode,
4805 gen_rtx_PRE_DEC (Pmode,
4806 stack_pointer_rtx)),
4810 /* Return >= 0 if there is an unused call-clobbered register available
4811 for the entire function. */
4814 ix86_select_alt_pic_regnum (void)
4816 if (current_function_is_leaf && !current_function_profile)
4819 for (i = 2; i >= 0; --i)
4820 if (!regs_ever_live[i])
4824 return INVALID_REGNUM;
4827 /* Return 1 if we need to save REGNO. */
4829 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4831 if (pic_offset_table_rtx
4832 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4833 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4834 || current_function_profile
4835 || current_function_calls_eh_return
4836 || current_function_uses_const_pool))
4838 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4843 if (current_function_calls_eh_return && maybe_eh_return)
4848 unsigned test = EH_RETURN_DATA_REGNO (i);
4849 if (test == INVALID_REGNUM)
4856 return (regs_ever_live[regno]
4857 && !call_used_regs[regno]
4858 && !fixed_regs[regno]
4859 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4862 /* Return number of registers to be saved on the stack. */
4865 ix86_nsaved_regs (void)
4870 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4871 if (ix86_save_reg (regno, true))
4876 /* Return the offset between two registers, one to be eliminated, and the other
4877 its replacement, at the start of a routine. */
4880 ix86_initial_elimination_offset (int from, int to)
4882 struct ix86_frame frame;
4883 ix86_compute_frame_layout (&frame);
4885 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4886 return frame.hard_frame_pointer_offset;
4887 else if (from == FRAME_POINTER_REGNUM
4888 && to == HARD_FRAME_POINTER_REGNUM)
4889 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4892 if (to != STACK_POINTER_REGNUM)
4894 else if (from == ARG_POINTER_REGNUM)
4895 return frame.stack_pointer_offset;
4896 else if (from != FRAME_POINTER_REGNUM)
4899 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4903 /* Fill structure ix86_frame about frame of currently computed function. */
4906 ix86_compute_frame_layout (struct ix86_frame *frame)
4908 HOST_WIDE_INT total_size;
4909 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4910 HOST_WIDE_INT offset;
4911 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4912 HOST_WIDE_INT size = get_frame_size ();
4914 frame->nregs = ix86_nsaved_regs ();
4917 /* During reload iteration the amount of registers saved can change.
4918 Recompute the value as needed. Do not recompute when amount of registers
4919 didn't change as reload does mutiple calls to the function and does not
4920 expect the decision to change within single iteration. */
4922 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4924 int count = frame->nregs;
4926 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4927 /* The fast prologue uses move instead of push to save registers. This
4928 is significantly longer, but also executes faster as modern hardware
4929 can execute the moves in parallel, but can't do that for push/pop.
4931 Be careful about choosing what prologue to emit: When function takes
4932 many instructions to execute we may use slow version as well as in
4933 case function is known to be outside hot spot (this is known with
4934 feedback only). Weight the size of function by number of registers
4935 to save as it is cheap to use one or two push instructions but very
4936 slow to use many of them. */
4938 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4939 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4940 || (flag_branch_probabilities
4941 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4942 cfun->machine->use_fast_prologue_epilogue = false;
4944 cfun->machine->use_fast_prologue_epilogue
4945 = !expensive_function_p (count);
4947 if (TARGET_PROLOGUE_USING_MOVE
4948 && cfun->machine->use_fast_prologue_epilogue)
4949 frame->save_regs_using_mov = true;
4951 frame->save_regs_using_mov = false;
4954 /* Skip return address and saved base pointer. */
4955 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4957 frame->hard_frame_pointer_offset = offset;
4959 /* Do some sanity checking of stack_alignment_needed and
4960 preferred_alignment, since i386 port is the only using those features
4961 that may break easily. */
4963 if (size && !stack_alignment_needed)
4965 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4967 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4969 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4972 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4973 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4975 /* Register save area */
4976 offset += frame->nregs * UNITS_PER_WORD;
4979 if (ix86_save_varrargs_registers)
4981 offset += X86_64_VARARGS_SIZE;
4982 frame->va_arg_size = X86_64_VARARGS_SIZE;
4985 frame->va_arg_size = 0;
4987 /* Align start of frame for local function. */
4988 frame->padding1 = ((offset + stack_alignment_needed - 1)
4989 & -stack_alignment_needed) - offset;
4991 offset += frame->padding1;
4993 /* Frame pointer points here. */
4994 frame->frame_pointer_offset = offset;
4998 /* Add outgoing arguments area. Can be skipped if we eliminated
4999 all the function calls as dead code. */
5000 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
5002 offset += current_function_outgoing_args_size;
5003 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5006 frame->outgoing_arguments_size = 0;
5008 /* Align stack boundary. Only needed if we're calling another function
5010 if (!current_function_is_leaf || current_function_calls_alloca)
5011 frame->padding2 = ((offset + preferred_alignment - 1)
5012 & -preferred_alignment) - offset;
5014 frame->padding2 = 0;
5016 offset += frame->padding2;
5018 /* We've reached end of stack frame. */
5019 frame->stack_pointer_offset = offset;
5021 /* Size prologue needs to allocate. */
5022 frame->to_allocate =
5023 (size + frame->padding1 + frame->padding2
5024 + frame->outgoing_arguments_size + frame->va_arg_size);
5026 if ((!frame->to_allocate && frame->nregs <= 1)
5027 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5028 frame->save_regs_using_mov = false;
5030 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5031 && current_function_is_leaf)
5033 frame->red_zone_size = frame->to_allocate;
5034 if (frame->save_regs_using_mov)
5035 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5036 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5037 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5040 frame->red_zone_size = 0;
5041 frame->to_allocate -= frame->red_zone_size;
5042 frame->stack_pointer_offset -= frame->red_zone_size;
5044 fprintf (stderr, "nregs: %i\n", frame->nregs);
5045 fprintf (stderr, "size: %i\n", size);
5046 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5047 fprintf (stderr, "padding1: %i\n", frame->padding1);
5048 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5049 fprintf (stderr, "padding2: %i\n", frame->padding2);
5050 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5051 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5052 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5053 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5054 frame->hard_frame_pointer_offset);
5055 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5059 /* Emit code to save registers in the prologue. */
5062 ix86_emit_save_regs (void)
5067 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5068 if (ix86_save_reg (regno, true))
5070 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5071 RTX_FRAME_RELATED_P (insn) = 1;
5075 /* Emit code to save registers using MOV insns. First register
5076 is restored from POINTER + OFFSET. */
5078 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5083 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5084 if (ix86_save_reg (regno, true))
5086 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5088 gen_rtx_REG (Pmode, regno));
5089 RTX_FRAME_RELATED_P (insn) = 1;
5090 offset += UNITS_PER_WORD;
5094 /* Expand prologue or epilogue stack adjustement.
5095 The pattern exist to put a dependency on all ebp-based memory accesses.
5096 STYLE should be negative if instructions should be marked as frame related,
5097 zero if %r11 register is live and cannot be freely used and positive
5101 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5106 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5107 else if (x86_64_immediate_operand (offset, DImode))
5108 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5112 /* r11 is used by indirect sibcall return as well, set before the
5113 epilogue and used after the epilogue. ATM indirect sibcall
5114 shouldn't be used together with huge frame sizes in one
5115 function because of the frame_size check in sibcall.c. */
5118 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5119 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5121 RTX_FRAME_RELATED_P (insn) = 1;
5122 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5126 RTX_FRAME_RELATED_P (insn) = 1;
5129 /* Expand the prologue into a bunch of separate insns. */
5132 ix86_expand_prologue (void)
5136 struct ix86_frame frame;
5137 HOST_WIDE_INT allocate;
5139 ix86_compute_frame_layout (&frame);
5141 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5142 slower on all targets. Also sdb doesn't like it. */
5144 if (frame_pointer_needed)
5146 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5147 RTX_FRAME_RELATED_P (insn) = 1;
5149 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5150 RTX_FRAME_RELATED_P (insn) = 1;
5153 allocate = frame.to_allocate;
5155 if (!frame.save_regs_using_mov)
5156 ix86_emit_save_regs ();
5158 allocate += frame.nregs * UNITS_PER_WORD;
5160 /* When using red zone we may start register saving before allocating
5161 the stack frame saving one cycle of the prologue. */
5162 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5163 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5164 : stack_pointer_rtx,
5165 -frame.nregs * UNITS_PER_WORD);
5169 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5170 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5171 GEN_INT (-allocate), -1);
5174 /* Only valid for Win32. */
5175 rtx eax = gen_rtx_REG (SImode, 0);
5176 bool eax_live = ix86_eax_live_at_start_p ();
5183 emit_insn (gen_push (eax));
5187 insn = emit_move_insn (eax, GEN_INT (allocate));
5188 RTX_FRAME_RELATED_P (insn) = 1;
5190 insn = emit_insn (gen_allocate_stack_worker (eax));
5191 RTX_FRAME_RELATED_P (insn) = 1;
5195 rtx t = plus_constant (stack_pointer_rtx, allocate);
5196 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5200 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5202 if (!frame_pointer_needed || !frame.to_allocate)
5203 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5205 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5206 -frame.nregs * UNITS_PER_WORD);
5209 pic_reg_used = false;
5210 if (pic_offset_table_rtx
5211 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5212 || current_function_profile))
5214 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5216 if (alt_pic_reg_used != INVALID_REGNUM)
5217 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5219 pic_reg_used = true;
5224 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5226 /* Even with accurate pre-reload life analysis, we can wind up
5227 deleting all references to the pic register after reload.
5228 Consider if cross-jumping unifies two sides of a branch
5229 controlled by a comparison vs the only read from a global.
5230 In which case, allow the set_got to be deleted, though we're
5231 too late to do anything about the ebx save in the prologue. */
5232 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5235 /* Prevent function calls from be scheduled before the call to mcount.
5236 In the pic_reg_used case, make sure that the got load isn't deleted. */
5237 if (current_function_profile)
5238 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5241 /* Emit code to restore saved registers using MOV insns. First register
5242 is restored from POINTER + OFFSET. */
5244 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5248 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5249 if (ix86_save_reg (regno, maybe_eh_return))
5251 emit_move_insn (gen_rtx_REG (Pmode, regno),
5252 adjust_address (gen_rtx_MEM (Pmode, pointer),
5254 offset += UNITS_PER_WORD;
5258 /* Restore function stack, frame, and registers. */
5261 ix86_expand_epilogue (int style)
5264 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5265 struct ix86_frame frame;
5266 HOST_WIDE_INT offset;
5268 ix86_compute_frame_layout (&frame);
5270 /* Calculate start of saved registers relative to ebp. Special care
5271 must be taken for the normal return case of a function using
5272 eh_return: the eax and edx registers are marked as saved, but not
5273 restored along this path. */
5274 offset = frame.nregs;
5275 if (current_function_calls_eh_return && style != 2)
5277 offset *= -UNITS_PER_WORD;
5279 /* If we're only restoring one register and sp is not valid then
5280 using a move instruction to restore the register since it's
5281 less work than reloading sp and popping the register.
5283 The default code result in stack adjustment using add/lea instruction,
5284 while this code results in LEAVE instruction (or discrete equivalent),
5285 so it is profitable in some other cases as well. Especially when there
5286 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5287 and there is exactly one register to pop. This heuristic may need some
5288 tuning in future. */
5289 if ((!sp_valid && frame.nregs <= 1)
5290 || (TARGET_EPILOGUE_USING_MOVE
5291 && cfun->machine->use_fast_prologue_epilogue
5292 && (frame.nregs > 1 || frame.to_allocate))
5293 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5294 || (frame_pointer_needed && TARGET_USE_LEAVE
5295 && cfun->machine->use_fast_prologue_epilogue
5296 && frame.nregs == 1)
5297 || current_function_calls_eh_return)
5299 /* Restore registers. We can use ebp or esp to address the memory
5300 locations. If both are available, default to ebp, since offsets
5301 are known to be small. Only exception is esp pointing directly to the
5302 end of block of saved registers, where we may simplify addressing
5305 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5306 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5307 frame.to_allocate, style == 2);
5309 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5310 offset, style == 2);
5312 /* eh_return epilogues need %ecx added to the stack pointer. */
5315 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5317 if (frame_pointer_needed)
5319 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5320 tmp = plus_constant (tmp, UNITS_PER_WORD);
5321 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5323 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5324 emit_move_insn (hard_frame_pointer_rtx, tmp);
5326 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5331 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5332 tmp = plus_constant (tmp, (frame.to_allocate
5333 + frame.nregs * UNITS_PER_WORD));
5334 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5337 else if (!frame_pointer_needed)
5338 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5339 GEN_INT (frame.to_allocate
5340 + frame.nregs * UNITS_PER_WORD),
5342 /* If not an i386, mov & pop is faster than "leave". */
5343 else if (TARGET_USE_LEAVE || optimize_size
5344 || !cfun->machine->use_fast_prologue_epilogue)
5345 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5348 pro_epilogue_adjust_stack (stack_pointer_rtx,
5349 hard_frame_pointer_rtx,
5352 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5354 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5359 /* First step is to deallocate the stack frame so that we can
5360 pop the registers. */
5363 if (!frame_pointer_needed)
5365 pro_epilogue_adjust_stack (stack_pointer_rtx,
5366 hard_frame_pointer_rtx,
5367 GEN_INT (offset), style);
5369 else if (frame.to_allocate)
5370 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5371 GEN_INT (frame.to_allocate), style);
5373 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5374 if (ix86_save_reg (regno, false))
5377 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5379 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5381 if (frame_pointer_needed)
5383 /* Leave results in shorter dependency chains on CPUs that are
5384 able to grok it fast. */
5385 if (TARGET_USE_LEAVE)
5386 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5387 else if (TARGET_64BIT)
5388 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5390 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5394 /* Sibcall epilogues don't want a return instruction. */
5398 if (current_function_pops_args && current_function_args_size)
5400 rtx popc = GEN_INT (current_function_pops_args);
5402 /* i386 can only pop 64K bytes. If asked to pop more, pop
5403 return address, do explicit add, and jump indirectly to the
5406 if (current_function_pops_args >= 65536)
5408 rtx ecx = gen_rtx_REG (SImode, 2);
5410 /* There is no "pascal" calling convention in 64bit ABI. */
5414 emit_insn (gen_popsi1 (ecx));
5415 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5416 emit_jump_insn (gen_return_indirect_internal (ecx));
5419 emit_jump_insn (gen_return_pop_internal (popc));
5422 emit_jump_insn (gen_return_internal ());
5425 /* Reset from the function's potential modifications. */
5428 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5429 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5431 if (pic_offset_table_rtx)
5432 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5435 /* Extract the parts of an RTL expression that is a valid memory address
5436 for an instruction. Return 0 if the structure of the address is
5437 grossly off. Return -1 if the address contains ASHIFT, so it is not
5438 strictly valid, but still used for computing length of lea instruction. */
5441 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5443 rtx base = NULL_RTX;
5444 rtx index = NULL_RTX;
5445 rtx disp = NULL_RTX;
5446 HOST_WIDE_INT scale = 1;
5447 rtx scale_rtx = NULL_RTX;
5449 enum ix86_address_seg seg = SEG_DEFAULT;
5451 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5453 else if (GET_CODE (addr) == PLUS)
5463 addends[n++] = XEXP (op, 1);
5466 while (GET_CODE (op) == PLUS);
5471 for (i = n; i >= 0; --i)
5474 switch (GET_CODE (op))
5479 index = XEXP (op, 0);
5480 scale_rtx = XEXP (op, 1);
5484 if (XINT (op, 1) == UNSPEC_TP
5485 && TARGET_TLS_DIRECT_SEG_REFS
5486 && seg == SEG_DEFAULT)
5487 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5516 else if (GET_CODE (addr) == MULT)
5518 index = XEXP (addr, 0); /* index*scale */
5519 scale_rtx = XEXP (addr, 1);
5521 else if (GET_CODE (addr) == ASHIFT)
5525 /* We're called for lea too, which implements ashift on occasion. */
5526 index = XEXP (addr, 0);
5527 tmp = XEXP (addr, 1);
5528 if (GET_CODE (tmp) != CONST_INT)
5530 scale = INTVAL (tmp);
5531 if ((unsigned HOST_WIDE_INT) scale > 3)
5537 disp = addr; /* displacement */
5539 /* Extract the integral value of scale. */
5542 if (GET_CODE (scale_rtx) != CONST_INT)
5544 scale = INTVAL (scale_rtx);
5547 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5548 if (base && index && scale == 1
5549 && (index == arg_pointer_rtx
5550 || index == frame_pointer_rtx
5551 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5558 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5559 if ((base == hard_frame_pointer_rtx
5560 || base == frame_pointer_rtx
5561 || base == arg_pointer_rtx) && !disp)
5564 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5565 Avoid this by transforming to [%esi+0]. */
5566 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5567 && base && !index && !disp
5569 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5572 /* Special case: encode reg+reg instead of reg*2. */
5573 if (!base && index && scale && scale == 2)
5574 base = index, scale = 1;
5576 /* Special case: scaling cannot be encoded without base or displacement. */
5577 if (!base && !disp && index && scale != 1)
5589 /* Return cost of the memory address x.
5590 For i386, it is better to use a complex address than let gcc copy
5591 the address into a reg and make a new pseudo. But not if the address
5592 requires to two regs - that would mean more pseudos with longer
5595 ix86_address_cost (rtx x)
5597 struct ix86_address parts;
5600 if (!ix86_decompose_address (x, &parts))
5603 if (parts.base && GET_CODE (parts.base) == SUBREG)
5604 parts.base = SUBREG_REG (parts.base);
5605 if (parts.index && GET_CODE (parts.index) == SUBREG)
5606 parts.index = SUBREG_REG (parts.index);
5608 /* More complex memory references are better. */
5609 if (parts.disp && parts.disp != const0_rtx)
5611 if (parts.seg != SEG_DEFAULT)
5614 /* Attempt to minimize number of registers in the address. */
5616 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5618 && (!REG_P (parts.index)
5619 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5623 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5625 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5626 && parts.base != parts.index)
5629 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5630 since it's predecode logic can't detect the length of instructions
5631 and it degenerates to vector decoded. Increase cost of such
5632 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5633 to split such addresses or even refuse such addresses at all.
5635 Following addressing modes are affected:
5640 The first and last case may be avoidable by explicitly coding the zero in
5641 memory address, but I don't have AMD-K6 machine handy to check this
5645 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5646 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5647 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5653 /* If X is a machine specific address (i.e. a symbol or label being
5654 referenced as a displacement from the GOT implemented using an
5655 UNSPEC), then return the base term. Otherwise return X. */
5658 ix86_find_base_term (rtx x)
5664 if (GET_CODE (x) != CONST)
5667 if (GET_CODE (term) == PLUS
5668 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5669 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5670 term = XEXP (term, 0);
5671 if (GET_CODE (term) != UNSPEC
5672 || XINT (term, 1) != UNSPEC_GOTPCREL)
5675 term = XVECEXP (term, 0, 0);
5677 if (GET_CODE (term) != SYMBOL_REF
5678 && GET_CODE (term) != LABEL_REF)
5684 term = ix86_delegitimize_address (x);
5686 if (GET_CODE (term) != SYMBOL_REF
5687 && GET_CODE (term) != LABEL_REF)
5693 /* Determine if a given RTX is a valid constant. We already know this
5694 satisfies CONSTANT_P. */
5697 legitimate_constant_p (rtx x)
5701 switch (GET_CODE (x))
5704 /* TLS symbols are not constant. */
5705 if (tls_symbolic_operand (x, Pmode))
5710 inner = XEXP (x, 0);
5712 /* Offsets of TLS symbols are never valid.
5713 Discourage CSE from creating them. */
5714 if (GET_CODE (inner) == PLUS
5715 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5718 if (GET_CODE (inner) == PLUS)
5720 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5722 inner = XEXP (inner, 0);
5725 /* Only some unspecs are valid as "constants". */
5726 if (GET_CODE (inner) == UNSPEC)
5727 switch (XINT (inner, 1))
5731 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5733 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5743 /* Otherwise we handle everything else in the move patterns. */
5747 /* Determine if it's legal to put X into the constant pool. This
5748 is not possible for the address of thread-local symbols, which
5749 is checked above. */
5752 ix86_cannot_force_const_mem (rtx x)
5754 return !legitimate_constant_p (x);
5757 /* Determine if a given RTX is a valid constant address. */
5760 constant_address_p (rtx x)
5762 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5765 /* Nonzero if the constant value X is a legitimate general operand
5766 when generating PIC code. It is given that flag_pic is on and
5767 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5770 legitimate_pic_operand_p (rtx x)
5774 switch (GET_CODE (x))
5777 inner = XEXP (x, 0);
5779 /* Only some unspecs are valid as "constants". */
5780 if (GET_CODE (inner) == UNSPEC)
5781 switch (XINT (inner, 1))
5784 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5792 return legitimate_pic_address_disp_p (x);
5799 /* Determine if a given CONST RTX is a valid memory displacement
5803 legitimate_pic_address_disp_p (register rtx disp)
5807 /* In 64bit mode we can allow direct addresses of symbols and labels
5808 when they are not dynamic symbols. */
5811 /* TLS references should always be enclosed in UNSPEC. */
5812 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5814 if (GET_CODE (disp) == SYMBOL_REF
5815 && ix86_cmodel == CM_SMALL_PIC
5816 && SYMBOL_REF_LOCAL_P (disp))
5818 if (GET_CODE (disp) == LABEL_REF)
5820 if (GET_CODE (disp) == CONST
5821 && GET_CODE (XEXP (disp, 0)) == PLUS)
5823 rtx op0 = XEXP (XEXP (disp, 0), 0);
5824 rtx op1 = XEXP (XEXP (disp, 0), 1);
5826 /* TLS references should always be enclosed in UNSPEC. */
5827 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5829 if (((GET_CODE (op0) == SYMBOL_REF
5830 && ix86_cmodel == CM_SMALL_PIC
5831 && SYMBOL_REF_LOCAL_P (op0))
5832 || GET_CODE (op0) == LABEL_REF)
5833 && GET_CODE (op1) == CONST_INT
5834 && INTVAL (op1) < 16*1024*1024
5835 && INTVAL (op1) >= -16*1024*1024)
5839 if (GET_CODE (disp) != CONST)
5841 disp = XEXP (disp, 0);
5845 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5846 of GOT tables. We should not need these anyway. */
5847 if (GET_CODE (disp) != UNSPEC
5848 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5851 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5852 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5858 if (GET_CODE (disp) == PLUS)
5860 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5862 disp = XEXP (disp, 0);
5866 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5867 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5869 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5870 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5871 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5873 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5874 if (! strcmp (sym_name, "<pic base>"))
5879 if (GET_CODE (disp) != UNSPEC)
5882 switch (XINT (disp, 1))
5887 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5889 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5890 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5891 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5893 case UNSPEC_GOTTPOFF:
5894 case UNSPEC_GOTNTPOFF:
5895 case UNSPEC_INDNTPOFF:
5898 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5900 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5902 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5908 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5909 memory address for an instruction. The MODE argument is the machine mode
5910 for the MEM expression that wants to use this address.
5912 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5913 convert common non-canonical forms to canonical form so that they will
5917 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5919 struct ix86_address parts;
5920 rtx base, index, disp;
5921 HOST_WIDE_INT scale;
5922 const char *reason = NULL;
5923 rtx reason_rtx = NULL_RTX;
5925 if (TARGET_DEBUG_ADDR)
5928 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5929 GET_MODE_NAME (mode), strict);
5933 if (ix86_decompose_address (addr, &parts) <= 0)
5935 reason = "decomposition failed";
5940 index = parts.index;
5942 scale = parts.scale;
5944 /* Validate base register.
5946 Don't allow SUBREG's here, it can lead to spill failures when the base
5947 is one word out of a two word structure, which is represented internally
5955 if (GET_CODE (base) == SUBREG)
5956 reg = SUBREG_REG (base);
5960 if (GET_CODE (reg) != REG)
5962 reason = "base is not a register";
5966 if (GET_MODE (base) != Pmode)
5968 reason = "base is not in Pmode";
5972 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5973 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5975 reason = "base is not valid";
5980 /* Validate index register.
5982 Don't allow SUBREG's here, it can lead to spill failures when the index
5983 is one word out of a two word structure, which is represented internally
5991 if (GET_CODE (index) == SUBREG)
5992 reg = SUBREG_REG (index);
5996 if (GET_CODE (reg) != REG)
5998 reason = "index is not a register";
6002 if (GET_MODE (index) != Pmode)
6004 reason = "index is not in Pmode";
6008 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6009 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6011 reason = "index is not valid";
6016 /* Validate scale factor. */
6019 reason_rtx = GEN_INT (scale);
6022 reason = "scale without index";
6026 if (scale != 2 && scale != 4 && scale != 8)
6028 reason = "scale is not a valid multiplier";
6033 /* Validate displacement. */
6038 if (GET_CODE (disp) == CONST
6039 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6040 switch (XINT (XEXP (disp, 0), 1))
6044 case UNSPEC_GOTPCREL:
6047 goto is_legitimate_pic;
6049 case UNSPEC_GOTTPOFF:
6050 case UNSPEC_GOTNTPOFF:
6051 case UNSPEC_INDNTPOFF:
6057 reason = "invalid address unspec";
6061 else if (flag_pic && (SYMBOLIC_CONST (disp)
6063 && !machopic_operand_p (disp)
6068 if (TARGET_64BIT && (index || base))
6070 /* foo@dtpoff(%rX) is ok. */
6071 if (GET_CODE (disp) != CONST
6072 || GET_CODE (XEXP (disp, 0)) != PLUS
6073 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6074 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6075 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6076 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6078 reason = "non-constant pic memory reference";
6082 else if (! legitimate_pic_address_disp_p (disp))
6084 reason = "displacement is an invalid pic construct";
6088 /* This code used to verify that a symbolic pic displacement
6089 includes the pic_offset_table_rtx register.
6091 While this is good idea, unfortunately these constructs may
6092 be created by "adds using lea" optimization for incorrect
6101 This code is nonsensical, but results in addressing
6102 GOT table with pic_offset_table_rtx base. We can't
6103 just refuse it easily, since it gets matched by
6104 "addsi3" pattern, that later gets split to lea in the
6105 case output register differs from input. While this
6106 can be handled by separate addsi pattern for this case
6107 that never results in lea, this seems to be easier and
6108 correct fix for crash to disable this test. */
6110 else if (GET_CODE (disp) != LABEL_REF
6111 && GET_CODE (disp) != CONST_INT
6112 && (GET_CODE (disp) != CONST
6113 || !legitimate_constant_p (disp))
6114 && (GET_CODE (disp) != SYMBOL_REF
6115 || !legitimate_constant_p (disp)))
6117 reason = "displacement is not constant";
6120 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6122 reason = "displacement is out of range";
6127 /* Everything looks valid. */
6128 if (TARGET_DEBUG_ADDR)
6129 fprintf (stderr, "Success.\n");
6133 if (TARGET_DEBUG_ADDR)
6135 fprintf (stderr, "Error: %s\n", reason);
6136 debug_rtx (reason_rtx);
6141 /* Return an unique alias set for the GOT. */
6143 static HOST_WIDE_INT
6144 ix86_GOT_alias_set (void)
6146 static HOST_WIDE_INT set = -1;
6148 set = new_alias_set ();
6152 /* Return a legitimate reference for ORIG (an address) using the
6153 register REG. If REG is 0, a new pseudo is generated.
6155 There are two types of references that must be handled:
6157 1. Global data references must load the address from the GOT, via
6158 the PIC reg. An insn is emitted to do this load, and the reg is
6161 2. Static data references, constant pool addresses, and code labels
6162 compute the address as an offset from the GOT, whose base is in
6163 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6164 differentiate them from global data objects. The returned
6165 address is the PIC reg + an unspec constant.
6167 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6168 reg also appears in the address. */
6171 legitimize_pic_address (rtx orig, rtx reg)
6179 reg = gen_reg_rtx (Pmode);
6180 /* Use the generic Mach-O PIC machinery. */
6181 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6184 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6186 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6188 /* This symbol may be referenced via a displacement from the PIC
6189 base address (@GOTOFF). */
6191 if (reload_in_progress)
6192 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6193 if (GET_CODE (addr) == CONST)
6194 addr = XEXP (addr, 0);
6195 if (GET_CODE (addr) == PLUS)
6197 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6198 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6201 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6202 new = gen_rtx_CONST (Pmode, new);
6203 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6207 emit_move_insn (reg, new);
6211 else if (GET_CODE (addr) == SYMBOL_REF)
6215 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6216 new = gen_rtx_CONST (Pmode, new);
6217 new = gen_rtx_MEM (Pmode, new);
6218 RTX_UNCHANGING_P (new) = 1;
6219 set_mem_alias_set (new, ix86_GOT_alias_set ());
6222 reg = gen_reg_rtx (Pmode);
6223 /* Use directly gen_movsi, otherwise the address is loaded
6224 into register for CSE. We don't want to CSE this addresses,
6225 instead we CSE addresses from the GOT table, so skip this. */
6226 emit_insn (gen_movsi (reg, new));
6231 /* This symbol must be referenced via a load from the
6232 Global Offset Table (@GOT). */
6234 if (reload_in_progress)
6235 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6236 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6237 new = gen_rtx_CONST (Pmode, new);
6238 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6239 new = gen_rtx_MEM (Pmode, new);
6240 RTX_UNCHANGING_P (new) = 1;
6241 set_mem_alias_set (new, ix86_GOT_alias_set ());
6244 reg = gen_reg_rtx (Pmode);
6245 emit_move_insn (reg, new);
6251 if (GET_CODE (addr) == CONST)
6253 addr = XEXP (addr, 0);
6255 /* We must match stuff we generate before. Assume the only
6256 unspecs that can get here are ours. Not that we could do
6257 anything with them anyway... */
6258 if (GET_CODE (addr) == UNSPEC
6259 || (GET_CODE (addr) == PLUS
6260 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6262 if (GET_CODE (addr) != PLUS)
6265 if (GET_CODE (addr) == PLUS)
6267 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6269 /* Check first to see if this is a constant offset from a @GOTOFF
6270 symbol reference. */
6271 if (local_symbolic_operand (op0, Pmode)
6272 && GET_CODE (op1) == CONST_INT)
6276 if (reload_in_progress)
6277 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6278 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6280 new = gen_rtx_PLUS (Pmode, new, op1);
6281 new = gen_rtx_CONST (Pmode, new);
6282 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6286 emit_move_insn (reg, new);
6292 if (INTVAL (op1) < -16*1024*1024
6293 || INTVAL (op1) >= 16*1024*1024)
6294 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6299 base = legitimize_pic_address (XEXP (addr, 0), reg);
6300 new = legitimize_pic_address (XEXP (addr, 1),
6301 base == reg ? NULL_RTX : reg);
6303 if (GET_CODE (new) == CONST_INT)
6304 new = plus_constant (base, INTVAL (new));
6307 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6309 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6310 new = XEXP (new, 1);
6312 new = gen_rtx_PLUS (Pmode, base, new);
6320 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6323 get_thread_pointer (int to_reg)
6327 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6331 reg = gen_reg_rtx (Pmode);
6332 insn = gen_rtx_SET (VOIDmode, reg, tp);
6333 insn = emit_insn (insn);
6338 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6339 false if we expect this to be used for a memory address and true if
6340 we expect to load the address into a register. */
6343 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6345 rtx dest, base, off, pic;
6350 case TLS_MODEL_GLOBAL_DYNAMIC:
6351 dest = gen_reg_rtx (Pmode);
6354 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6357 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6358 insns = get_insns ();
6361 emit_libcall_block (insns, dest, rax, x);
6364 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6367 case TLS_MODEL_LOCAL_DYNAMIC:
6368 base = gen_reg_rtx (Pmode);
6371 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6374 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6375 insns = get_insns ();
6378 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6379 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6380 emit_libcall_block (insns, base, rax, note);
6383 emit_insn (gen_tls_local_dynamic_base_32 (base));
6385 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6386 off = gen_rtx_CONST (Pmode, off);
6388 return gen_rtx_PLUS (Pmode, base, off);
6390 case TLS_MODEL_INITIAL_EXEC:
6394 type = UNSPEC_GOTNTPOFF;
6398 if (reload_in_progress)
6399 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6400 pic = pic_offset_table_rtx;
6401 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6403 else if (!TARGET_GNU_TLS)
6405 pic = gen_reg_rtx (Pmode);
6406 emit_insn (gen_set_got (pic));
6407 type = UNSPEC_GOTTPOFF;
6412 type = UNSPEC_INDNTPOFF;
6415 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6416 off = gen_rtx_CONST (Pmode, off);
6418 off = gen_rtx_PLUS (Pmode, pic, off);
6419 off = gen_rtx_MEM (Pmode, off);
6420 RTX_UNCHANGING_P (off) = 1;
6421 set_mem_alias_set (off, ix86_GOT_alias_set ());
6423 if (TARGET_64BIT || TARGET_GNU_TLS)
6425 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6426 off = force_reg (Pmode, off);
6427 return gen_rtx_PLUS (Pmode, base, off);
6431 base = get_thread_pointer (true);
6432 dest = gen_reg_rtx (Pmode);
6433 emit_insn (gen_subsi3 (dest, base, off));
6437 case TLS_MODEL_LOCAL_EXEC:
6438 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6439 (TARGET_64BIT || TARGET_GNU_TLS)
6440 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6441 off = gen_rtx_CONST (Pmode, off);
6443 if (TARGET_64BIT || TARGET_GNU_TLS)
6445 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6446 return gen_rtx_PLUS (Pmode, base, off);
6450 base = get_thread_pointer (true);
6451 dest = gen_reg_rtx (Pmode);
6452 emit_insn (gen_subsi3 (dest, base, off));
6463 /* Try machine-dependent ways of modifying an illegitimate address
6464 to be legitimate. If we find one, return the new, valid address.
6465 This macro is used in only one place: `memory_address' in explow.c.
6467 OLDX is the address as it was before break_out_memory_refs was called.
6468 In some cases it is useful to look at this to decide what needs to be done.
6470 MODE and WIN are passed so that this macro can use
6471 GO_IF_LEGITIMATE_ADDRESS.
6473 It is always safe for this macro to do nothing. It exists to recognize
6474 opportunities to optimize the output.
6476 For the 80386, we handle X+REG by loading X into a register R and
6477 using R+REG. R will go in a general reg and indexing will be used.
6478 However, if REG is a broken-out memory address or multiplication,
6479 nothing needs to be done because REG can certainly go in a general reg.
6481 When -fpic is used, special handling is needed for symbolic references.
6482 See comments by legitimize_pic_address in i386.c for details. */
6485 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6486 enum machine_mode mode)
6491 if (TARGET_DEBUG_ADDR)
6493 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6494 GET_MODE_NAME (mode));
6498 log = tls_symbolic_operand (x, mode);
6500 return legitimize_tls_address (x, log, false);
6502 if (flag_pic && SYMBOLIC_CONST (x))
6503 return legitimize_pic_address (x, 0);
6505 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6506 if (GET_CODE (x) == ASHIFT
6507 && GET_CODE (XEXP (x, 1)) == CONST_INT
6508 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6511 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6512 GEN_INT (1 << log));
6515 if (GET_CODE (x) == PLUS)
6517 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6519 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6520 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6521 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6524 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6525 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6526 GEN_INT (1 << log));
6529 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6530 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6531 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6534 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6535 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6536 GEN_INT (1 << log));
6539 /* Put multiply first if it isn't already. */
6540 if (GET_CODE (XEXP (x, 1)) == MULT)
6542 rtx tmp = XEXP (x, 0);
6543 XEXP (x, 0) = XEXP (x, 1);
6548 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6549 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6550 created by virtual register instantiation, register elimination, and
6551 similar optimizations. */
6552 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6555 x = gen_rtx_PLUS (Pmode,
6556 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6557 XEXP (XEXP (x, 1), 0)),
6558 XEXP (XEXP (x, 1), 1));
6562 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6563 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6564 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6565 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6566 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6567 && CONSTANT_P (XEXP (x, 1)))
6570 rtx other = NULL_RTX;
6572 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6574 constant = XEXP (x, 1);
6575 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6577 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6579 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6580 other = XEXP (x, 1);
6588 x = gen_rtx_PLUS (Pmode,
6589 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6590 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6591 plus_constant (other, INTVAL (constant)));
6595 if (changed && legitimate_address_p (mode, x, FALSE))
6598 if (GET_CODE (XEXP (x, 0)) == MULT)
6601 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6604 if (GET_CODE (XEXP (x, 1)) == MULT)
6607 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6611 && GET_CODE (XEXP (x, 1)) == REG
6612 && GET_CODE (XEXP (x, 0)) == REG)
6615 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6618 x = legitimize_pic_address (x, 0);
6621 if (changed && legitimate_address_p (mode, x, FALSE))
6624 if (GET_CODE (XEXP (x, 0)) == REG)
6626 register rtx temp = gen_reg_rtx (Pmode);
6627 register rtx val = force_operand (XEXP (x, 1), temp);
6629 emit_move_insn (temp, val);
6635 else if (GET_CODE (XEXP (x, 1)) == REG)
6637 register rtx temp = gen_reg_rtx (Pmode);
6638 register rtx val = force_operand (XEXP (x, 0), temp);
6640 emit_move_insn (temp, val);
6650 /* Print an integer constant expression in assembler syntax. Addition
6651 and subtraction are the only arithmetic that may appear in these
6652 expressions. FILE is the stdio stream to write to, X is the rtx, and
6653 CODE is the operand print code from the output string. */
6656 output_pic_addr_const (FILE *file, rtx x, int code)
6660 switch (GET_CODE (x))
6670 assemble_name (file, XSTR (x, 0));
6671 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6672 fputs ("@PLT", file);
6679 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6680 assemble_name (asm_out_file, buf);
6684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6688 /* This used to output parentheses around the expression,
6689 but that does not work on the 386 (either ATT or BSD assembler). */
6690 output_pic_addr_const (file, XEXP (x, 0), code);
6694 if (GET_MODE (x) == VOIDmode)
6696 /* We can use %d if the number is <32 bits and positive. */
6697 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6698 fprintf (file, "0x%lx%08lx",
6699 (unsigned long) CONST_DOUBLE_HIGH (x),
6700 (unsigned long) CONST_DOUBLE_LOW (x));
6702 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6705 /* We can't handle floating point constants;
6706 PRINT_OPERAND must handle them. */
6707 output_operand_lossage ("floating constant misused");
6711 /* Some assemblers need integer constants to appear first. */
6712 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6714 output_pic_addr_const (file, XEXP (x, 0), code);
6716 output_pic_addr_const (file, XEXP (x, 1), code);
6718 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6720 output_pic_addr_const (file, XEXP (x, 1), code);
6722 output_pic_addr_const (file, XEXP (x, 0), code);
6730 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6731 output_pic_addr_const (file, XEXP (x, 0), code);
6733 output_pic_addr_const (file, XEXP (x, 1), code);
6735 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6739 if (XVECLEN (x, 0) != 1)
6741 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6742 switch (XINT (x, 1))
6745 fputs ("@GOT", file);
6748 fputs ("@GOTOFF", file);
6750 case UNSPEC_GOTPCREL:
6751 fputs ("@GOTPCREL(%rip)", file);
6753 case UNSPEC_GOTTPOFF:
6754 /* FIXME: This might be @TPOFF in Sun ld too. */
6755 fputs ("@GOTTPOFF", file);
6758 fputs ("@TPOFF", file);
6762 fputs ("@TPOFF", file);
6764 fputs ("@NTPOFF", file);
6767 fputs ("@DTPOFF", file);
6769 case UNSPEC_GOTNTPOFF:
6771 fputs ("@GOTTPOFF(%rip)", file);
6773 fputs ("@GOTNTPOFF", file);
6775 case UNSPEC_INDNTPOFF:
6776 fputs ("@INDNTPOFF", file);
6779 output_operand_lossage ("invalid UNSPEC as operand");
6785 output_operand_lossage ("invalid expression as operand");
6789 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6790 We need to handle our special PIC relocations. */
6793 i386_dwarf_output_addr_const (FILE *file, rtx x)
6796 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6800 fprintf (file, "%s", ASM_LONG);
6803 output_pic_addr_const (file, x, '\0');
6805 output_addr_const (file, x);
6809 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6810 We need to emit DTP-relative relocations. */
6813 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6815 fputs (ASM_LONG, file);
6816 output_addr_const (file, x);
6817 fputs ("@DTPOFF", file);
6823 fputs (", 0", file);
6830 /* In the name of slightly smaller debug output, and to cater to
6831 general assembler losage, recognize PIC+GOTOFF and turn it back
6832 into a direct symbol reference. */
6835 ix86_delegitimize_address (rtx orig_x)
6839 if (GET_CODE (x) == MEM)
6844 if (GET_CODE (x) != CONST
6845 || GET_CODE (XEXP (x, 0)) != UNSPEC
6846 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6847 || GET_CODE (orig_x) != MEM)
6849 return XVECEXP (XEXP (x, 0), 0, 0);
6852 if (GET_CODE (x) != PLUS
6853 || GET_CODE (XEXP (x, 1)) != CONST)
6856 if (GET_CODE (XEXP (x, 0)) == REG
6857 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6858 /* %ebx + GOT/GOTOFF */
6860 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6862 /* %ebx + %reg * scale + GOT/GOTOFF */
6864 if (GET_CODE (XEXP (y, 0)) == REG
6865 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6867 else if (GET_CODE (XEXP (y, 1)) == REG
6868 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6872 if (GET_CODE (y) != REG
6873 && GET_CODE (y) != MULT
6874 && GET_CODE (y) != ASHIFT)
6880 x = XEXP (XEXP (x, 1), 0);
6881 if (GET_CODE (x) == UNSPEC
6882 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6883 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6886 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6887 return XVECEXP (x, 0, 0);
6890 if (GET_CODE (x) == PLUS
6891 && GET_CODE (XEXP (x, 0)) == UNSPEC
6892 && GET_CODE (XEXP (x, 1)) == CONST_INT
6893 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6894 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6895 && GET_CODE (orig_x) != MEM)))
6897 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6899 return gen_rtx_PLUS (Pmode, y, x);
6907 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6912 if (mode == CCFPmode || mode == CCFPUmode)
6914 enum rtx_code second_code, bypass_code;
6915 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6916 if (bypass_code != NIL || second_code != NIL)
6918 code = ix86_fp_compare_code_to_integer (code);
6922 code = reverse_condition (code);
6933 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6938 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6939 Those same assemblers have the same but opposite losage on cmov. */
6942 suffix = fp ? "nbe" : "a";
6945 if (mode == CCNOmode || mode == CCGOCmode)
6947 else if (mode == CCmode || mode == CCGCmode)
6958 if (mode == CCNOmode || mode == CCGOCmode)
6960 else if (mode == CCmode || mode == CCGCmode)
6969 suffix = fp ? "nb" : "ae";
6972 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6982 suffix = fp ? "u" : "p";
6985 suffix = fp ? "nu" : "np";
6990 fputs (suffix, file);
6994 print_reg (rtx x, int code, FILE *file)
6996 /* Code -1 indicates we are called from print_rtx, and it is not
6997 an error for a virtual register to appear here. */
7000 else if (REGNO (x) == ARG_POINTER_REGNUM
7001 || REGNO (x) == FRAME_POINTER_REGNUM
7002 || REGNO (x) == FLAGS_REG
7003 || REGNO (x) == FPSR_REG)
7006 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7009 if (code == 'w' || MMX_REG_P (x))
7011 else if (code == 'b')
7013 else if (code == 'k')
7015 else if (code == 'q')
7017 else if (code == 'y')
7019 else if (code == 'h')
7022 code = GET_MODE_SIZE (GET_MODE (x));
7024 /* Irritatingly, AMD extended registers use different naming convention
7025 from the normal registers. */
7026 if (REX_INT_REG_P (x))
7033 error ("extended registers have no high halves");
7036 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7039 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7042 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7045 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7048 error ("unsupported operand size for extended register");
7056 if (STACK_TOP_P (x))
7058 fputs ("st(0)", file);
7065 if (! ANY_FP_REG_P (x))
7066 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7070 fputs (hi_reg_name[REGNO (x)], file);
7073 fputs (qi_reg_name[REGNO (x)], file);
7076 fputs (qi_high_reg_name[REGNO (x)], file);
7083 /* Locate some local-dynamic symbol still in use by this function
7084 so that we can print its name in some tls_local_dynamic_base
7088 get_some_local_dynamic_name (void)
7092 if (cfun->machine->some_ld_name)
7093 return cfun->machine->some_ld_name;
7095 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7097 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7098 return cfun->machine->some_ld_name;
7104 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7108 if (GET_CODE (x) == SYMBOL_REF
7109 && local_dynamic_symbolic_operand (x, Pmode))
7111 cfun->machine->some_ld_name = XSTR (x, 0);
7119 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7120 C -- print opcode suffix for set/cmov insn.
7121 c -- like C, but print reversed condition
7122 F,f -- likewise, but for floating-point.
7123 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7125 R -- print the prefix for register names.
7126 z -- print the opcode suffix for the size of the current operand.
7127 * -- print a star (in certain assembler syntax)
7128 A -- print an absolute memory reference.
7129 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7130 s -- print a shift double count, followed by the assemblers argument
7132 b -- print the QImode name of the register for the indicated operand.
7133 %b0 would print %al if operands[0] is reg 0.
7134 w -- likewise, print the HImode name of the register.
7135 k -- likewise, print the SImode name of the register.
7136 q -- likewise, print the DImode name of the register.
7137 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7138 y -- print "st(0)" instead of "st" as a register.
7139 D -- print condition for SSE cmp instruction.
7140 P -- if PIC, print an @PLT suffix.
7141 X -- don't print any sort of PIC '@' suffix for a symbol.
7142 & -- print some in-use local-dynamic symbol name.
7146 print_operand (FILE *file, rtx x, int code)
7153 if (ASSEMBLER_DIALECT == ASM_ATT)
7158 assemble_name (file, get_some_local_dynamic_name ());
7162 if (ASSEMBLER_DIALECT == ASM_ATT)
7164 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7166 /* Intel syntax. For absolute addresses, registers should not
7167 be surrounded by braces. */
7168 if (GET_CODE (x) != REG)
7171 PRINT_OPERAND (file, x, 0);
7179 PRINT_OPERAND (file, x, 0);
7184 if (ASSEMBLER_DIALECT == ASM_ATT)
7189 if (ASSEMBLER_DIALECT == ASM_ATT)
7194 if (ASSEMBLER_DIALECT == ASM_ATT)
7199 if (ASSEMBLER_DIALECT == ASM_ATT)
7204 if (ASSEMBLER_DIALECT == ASM_ATT)
7209 if (ASSEMBLER_DIALECT == ASM_ATT)
7214 /* 387 opcodes don't get size suffixes if the operands are
7216 if (STACK_REG_P (x))
7219 /* Likewise if using Intel opcodes. */
7220 if (ASSEMBLER_DIALECT == ASM_INTEL)
7223 /* This is the size of op from size of operand. */
7224 switch (GET_MODE_SIZE (GET_MODE (x)))
7227 #ifdef HAVE_GAS_FILDS_FISTS
7233 if (GET_MODE (x) == SFmode)
7248 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7250 #ifdef GAS_MNEMONICS
7276 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7278 PRINT_OPERAND (file, x, 0);
7284 /* Little bit of braindamage here. The SSE compare instructions
7285 does use completely different names for the comparisons that the
7286 fp conditional moves. */
7287 switch (GET_CODE (x))
7302 fputs ("unord", file);
7306 fputs ("neq", file);
7310 fputs ("nlt", file);
7314 fputs ("nle", file);
7317 fputs ("ord", file);
7325 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7326 if (ASSEMBLER_DIALECT == ASM_ATT)
7328 switch (GET_MODE (x))
7330 case HImode: putc ('w', file); break;
7332 case SFmode: putc ('l', file); break;
7334 case DFmode: putc ('q', file); break;
7342 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7345 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7346 if (ASSEMBLER_DIALECT == ASM_ATT)
7349 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7352 /* Like above, but reverse condition */
7354 /* Check to see if argument to %c is really a constant
7355 and not a condition code which needs to be reversed. */
7356 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7358 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7361 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7364 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7365 if (ASSEMBLER_DIALECT == ASM_ATT)
7368 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7374 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7377 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7380 int pred_val = INTVAL (XEXP (x, 0));
7382 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7383 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7385 int taken = pred_val > REG_BR_PROB_BASE / 2;
7386 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7388 /* Emit hints only in the case default branch prediction
7389 heuristics would fail. */
7390 if (taken != cputaken)
7392 /* We use 3e (DS) prefix for taken branches and
7393 2e (CS) prefix for not taken branches. */
7395 fputs ("ds ; ", file);
7397 fputs ("cs ; ", file);
7404 output_operand_lossage ("invalid operand code `%c'", code);
7408 if (GET_CODE (x) == REG)
7410 PRINT_REG (x, code, file);
7413 else if (GET_CODE (x) == MEM)
7415 /* No `byte ptr' prefix for call instructions. */
7416 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7419 switch (GET_MODE_SIZE (GET_MODE (x)))
7421 case 1: size = "BYTE"; break;
7422 case 2: size = "WORD"; break;
7423 case 4: size = "DWORD"; break;
7424 case 8: size = "QWORD"; break;
7425 case 12: size = "XWORD"; break;
7426 case 16: size = "XMMWORD"; break;
7431 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7434 else if (code == 'w')
7436 else if (code == 'k')
7440 fputs (" PTR ", file);
7444 /* Avoid (%rip) for call operands. */
7445 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7446 && GET_CODE (x) != CONST_INT)
7447 output_addr_const (file, x);
7448 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7449 output_operand_lossage ("invalid constraints for operand");
7454 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7459 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7460 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7462 if (ASSEMBLER_DIALECT == ASM_ATT)
7464 fprintf (file, "0x%lx", l);
7467 /* These float cases don't actually occur as immediate operands. */
7468 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7472 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7473 fprintf (file, "%s", dstr);
7476 else if (GET_CODE (x) == CONST_DOUBLE
7477 && GET_MODE (x) == XFmode)
7481 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7482 fprintf (file, "%s", dstr);
7489 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7491 if (ASSEMBLER_DIALECT == ASM_ATT)
7494 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7495 || GET_CODE (x) == LABEL_REF)
7497 if (ASSEMBLER_DIALECT == ASM_ATT)
7500 fputs ("OFFSET FLAT:", file);
7503 if (GET_CODE (x) == CONST_INT)
7504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7506 output_pic_addr_const (file, x, code);
7508 output_addr_const (file, x);
7512 /* Print a memory operand whose address is ADDR. */
7515 print_operand_address (FILE *file, register rtx addr)
7517 struct ix86_address parts;
7518 rtx base, index, disp;
7521 if (! ix86_decompose_address (addr, &parts))
7525 index = parts.index;
7527 scale = parts.scale;
7535 if (USER_LABEL_PREFIX[0] == 0)
7537 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7543 if (!base && !index)
7545 /* Displacement only requires special attention. */
7547 if (GET_CODE (disp) == CONST_INT)
7549 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7551 if (USER_LABEL_PREFIX[0] == 0)
7553 fputs ("ds:", file);
7555 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7558 output_pic_addr_const (file, disp, 0);
7560 output_addr_const (file, disp);
7562 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7564 && ((GET_CODE (disp) == SYMBOL_REF
7565 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7566 || GET_CODE (disp) == LABEL_REF
7567 || (GET_CODE (disp) == CONST
7568 && GET_CODE (XEXP (disp, 0)) == PLUS
7569 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7570 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7571 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7572 fputs ("(%rip)", file);
7576 if (ASSEMBLER_DIALECT == ASM_ATT)
7581 output_pic_addr_const (file, disp, 0);
7582 else if (GET_CODE (disp) == LABEL_REF)
7583 output_asm_label (disp);
7585 output_addr_const (file, disp);
7590 PRINT_REG (base, 0, file);
7594 PRINT_REG (index, 0, file);
7596 fprintf (file, ",%d", scale);
7602 rtx offset = NULL_RTX;
7606 /* Pull out the offset of a symbol; print any symbol itself. */
7607 if (GET_CODE (disp) == CONST
7608 && GET_CODE (XEXP (disp, 0)) == PLUS
7609 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7611 offset = XEXP (XEXP (disp, 0), 1);
7612 disp = gen_rtx_CONST (VOIDmode,
7613 XEXP (XEXP (disp, 0), 0));
7617 output_pic_addr_const (file, disp, 0);
7618 else if (GET_CODE (disp) == LABEL_REF)
7619 output_asm_label (disp);
7620 else if (GET_CODE (disp) == CONST_INT)
7623 output_addr_const (file, disp);
7629 PRINT_REG (base, 0, file);
7632 if (INTVAL (offset) >= 0)
7634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7645 PRINT_REG (index, 0, file);
7647 fprintf (file, "*%d", scale);
7655 output_addr_const_extra (FILE *file, rtx x)
7659 if (GET_CODE (x) != UNSPEC)
7662 op = XVECEXP (x, 0, 0);
7663 switch (XINT (x, 1))
7665 case UNSPEC_GOTTPOFF:
7666 output_addr_const (file, op);
7667 /* FIXME: This might be @TPOFF in Sun ld. */
7668 fputs ("@GOTTPOFF", file);
7671 output_addr_const (file, op);
7672 fputs ("@TPOFF", file);
7675 output_addr_const (file, op);
7677 fputs ("@TPOFF", file);
7679 fputs ("@NTPOFF", file);
7682 output_addr_const (file, op);
7683 fputs ("@DTPOFF", file);
7685 case UNSPEC_GOTNTPOFF:
7686 output_addr_const (file, op);
7688 fputs ("@GOTTPOFF(%rip)", file);
7690 fputs ("@GOTNTPOFF", file);
7692 case UNSPEC_INDNTPOFF:
7693 output_addr_const (file, op);
7694 fputs ("@INDNTPOFF", file);
7704 /* Split one or more DImode RTL references into pairs of SImode
7705 references. The RTL can be REG, offsettable MEM, integer constant, or
7706 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7707 split and "num" is its length. lo_half and hi_half are output arrays
7708 that parallel "operands". */
7711 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7715 rtx op = operands[num];
7717 /* simplify_subreg refuse to split volatile memory addresses,
7718 but we still have to handle it. */
7719 if (GET_CODE (op) == MEM)
7721 lo_half[num] = adjust_address (op, SImode, 0);
7722 hi_half[num] = adjust_address (op, SImode, 4);
7726 lo_half[num] = simplify_gen_subreg (SImode, op,
7727 GET_MODE (op) == VOIDmode
7728 ? DImode : GET_MODE (op), 0);
7729 hi_half[num] = simplify_gen_subreg (SImode, op,
7730 GET_MODE (op) == VOIDmode
7731 ? DImode : GET_MODE (op), 4);
7735 /* Split one or more TImode RTL references into pairs of SImode
7736 references. The RTL can be REG, offsettable MEM, integer constant, or
7737 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7738 split and "num" is its length. lo_half and hi_half are output arrays
7739 that parallel "operands". */
7742 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7746 rtx op = operands[num];
7748 /* simplify_subreg refuse to split volatile memory addresses, but we
7749 still have to handle it. */
7750 if (GET_CODE (op) == MEM)
7752 lo_half[num] = adjust_address (op, DImode, 0);
7753 hi_half[num] = adjust_address (op, DImode, 8);
7757 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7758 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7763 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7764 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7765 is the expression of the binary operation. The output may either be
7766 emitted here, or returned to the caller, like all output_* functions.
7768 There is no guarantee that the operands are the same mode, as they
7769 might be within FLOAT or FLOAT_EXTEND expressions. */
7771 #ifndef SYSV386_COMPAT
7772 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7773 wants to fix the assemblers because that causes incompatibility
7774 with gcc. No-one wants to fix gcc because that causes
7775 incompatibility with assemblers... You can use the option of
7776 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7777 #define SYSV386_COMPAT 1
7781 output_387_binary_op (rtx insn, rtx *operands)
7783 static char buf[30];
7786 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7788 #ifdef ENABLE_CHECKING
7789 /* Even if we do not want to check the inputs, this documents input
7790 constraints. Which helps in understanding the following code. */
7791 if (STACK_REG_P (operands[0])
7792 && ((REG_P (operands[1])
7793 && REGNO (operands[0]) == REGNO (operands[1])
7794 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7795 || (REG_P (operands[2])
7796 && REGNO (operands[0]) == REGNO (operands[2])
7797 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7798 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7804 switch (GET_CODE (operands[3]))
7807 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7808 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7816 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7817 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7825 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7826 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7834 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7835 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7849 if (GET_MODE (operands[0]) == SFmode)
7850 strcat (buf, "ss\t{%2, %0|%0, %2}");
7852 strcat (buf, "sd\t{%2, %0|%0, %2}");
7857 switch (GET_CODE (operands[3]))
7861 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7863 rtx temp = operands[2];
7864 operands[2] = operands[1];
7868 /* know operands[0] == operands[1]. */
7870 if (GET_CODE (operands[2]) == MEM)
7876 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7878 if (STACK_TOP_P (operands[0]))
7879 /* How is it that we are storing to a dead operand[2]?
7880 Well, presumably operands[1] is dead too. We can't
7881 store the result to st(0) as st(0) gets popped on this
7882 instruction. Instead store to operands[2] (which I
7883 think has to be st(1)). st(1) will be popped later.
7884 gcc <= 2.8.1 didn't have this check and generated
7885 assembly code that the Unixware assembler rejected. */
7886 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7888 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7892 if (STACK_TOP_P (operands[0]))
7893 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7895 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7900 if (GET_CODE (operands[1]) == MEM)
7906 if (GET_CODE (operands[2]) == MEM)
7912 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7915 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7916 derived assemblers, confusingly reverse the direction of
7917 the operation for fsub{r} and fdiv{r} when the
7918 destination register is not st(0). The Intel assembler
7919 doesn't have this brain damage. Read !SYSV386_COMPAT to
7920 figure out what the hardware really does. */
7921 if (STACK_TOP_P (operands[0]))
7922 p = "{p\t%0, %2|rp\t%2, %0}";
7924 p = "{rp\t%2, %0|p\t%0, %2}";
7926 if (STACK_TOP_P (operands[0]))
7927 /* As above for fmul/fadd, we can't store to st(0). */
7928 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7930 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7935 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7938 if (STACK_TOP_P (operands[0]))
7939 p = "{rp\t%0, %1|p\t%1, %0}";
7941 p = "{p\t%1, %0|rp\t%0, %1}";
7943 if (STACK_TOP_P (operands[0]))
7944 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7946 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7951 if (STACK_TOP_P (operands[0]))
7953 if (STACK_TOP_P (operands[1]))
7954 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7956 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7959 else if (STACK_TOP_P (operands[1]))
7962 p = "{\t%1, %0|r\t%0, %1}";
7964 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7970 p = "{r\t%2, %0|\t%0, %2}";
7972 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7985 /* Output code to initialize control word copies used by
7986 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7987 is set to control word rounding downwards. */
7989 emit_i387_cw_initialization (rtx normal, rtx round_down)
7991 rtx reg = gen_reg_rtx (HImode);
7993 emit_insn (gen_x86_fnstcw_1 (normal));
7994 emit_move_insn (reg, normal);
7995 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7997 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7999 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8000 emit_move_insn (round_down, reg);
8003 /* Output code for INSN to convert a float to a signed int. OPERANDS
8004 are the insn operands. The output may be [HSD]Imode and the input
8005 operand may be [SDX]Fmode. */
8008 output_fix_trunc (rtx insn, rtx *operands)
8010 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8011 int dimode_p = GET_MODE (operands[0]) == DImode;
8013 /* Jump through a hoop or two for DImode, since the hardware has no
8014 non-popping instruction. We used to do this a different way, but
8015 that was somewhat fragile and broke with post-reload splitters. */
8016 if (dimode_p && !stack_top_dies)
8017 output_asm_insn ("fld\t%y1", operands);
8019 if (!STACK_TOP_P (operands[1]))
8022 if (GET_CODE (operands[0]) != MEM)
8025 output_asm_insn ("fldcw\t%3", operands);
8026 if (stack_top_dies || dimode_p)
8027 output_asm_insn ("fistp%z0\t%0", operands);
8029 output_asm_insn ("fist%z0\t%0", operands);
8030 output_asm_insn ("fldcw\t%2", operands);
8035 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8036 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8037 when fucom should be used. */
8040 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8043 rtx cmp_op0 = operands[0];
8044 rtx cmp_op1 = operands[1];
8045 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8050 cmp_op1 = operands[2];
8054 if (GET_MODE (operands[0]) == SFmode)
8056 return "ucomiss\t{%1, %0|%0, %1}";
8058 return "comiss\t{%1, %0|%0, %1}";
8061 return "ucomisd\t{%1, %0|%0, %1}";
8063 return "comisd\t{%1, %0|%0, %1}";
8066 if (! STACK_TOP_P (cmp_op0))
8069 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8071 if (STACK_REG_P (cmp_op1)
8073 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8074 && REGNO (cmp_op1) != FIRST_STACK_REG)
8076 /* If both the top of the 387 stack dies, and the other operand
8077 is also a stack register that dies, then this must be a
8078 `fcompp' float compare */
8082 /* There is no double popping fcomi variant. Fortunately,
8083 eflags is immune from the fstp's cc clobbering. */
8085 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8087 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8095 return "fucompp\n\tfnstsw\t%0";
8097 return "fcompp\n\tfnstsw\t%0";
8110 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8112 static const char * const alt[24] =
8124 "fcomi\t{%y1, %0|%0, %y1}",
8125 "fcomip\t{%y1, %0|%0, %y1}",
8126 "fucomi\t{%y1, %0|%0, %y1}",
8127 "fucomip\t{%y1, %0|%0, %y1}",
8134 "fcom%z2\t%y2\n\tfnstsw\t%0",
8135 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8136 "fucom%z2\t%y2\n\tfnstsw\t%0",
8137 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8139 "ficom%z2\t%y2\n\tfnstsw\t%0",
8140 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8148 mask = eflags_p << 3;
8149 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8150 mask |= unordered_p << 1;
8151 mask |= stack_top_dies;
8164 ix86_output_addr_vec_elt (FILE *file, int value)
8166 const char *directive = ASM_LONG;
8171 directive = ASM_QUAD;
8177 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8181 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8184 fprintf (file, "%s%s%d-%s%d\n",
8185 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8186 else if (HAVE_AS_GOTOFF_IN_DATA)
8187 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8189 else if (TARGET_MACHO)
8191 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8192 machopic_output_function_base_name (file);
8193 fprintf(file, "\n");
8197 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8198 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8201 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8205 ix86_expand_clear (rtx dest)
8209 /* We play register width games, which are only valid after reload. */
8210 if (!reload_completed)
8213 /* Avoid HImode and its attendant prefix byte. */
8214 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8215 dest = gen_rtx_REG (SImode, REGNO (dest));
8217 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8219 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8220 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8222 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8223 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8229 /* X is an unchanging MEM. If it is a constant pool reference, return
8230 the constant pool rtx, else NULL. */
8233 maybe_get_pool_constant (rtx x)
8235 x = ix86_delegitimize_address (XEXP (x, 0));
8237 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8238 return get_pool_constant (x);
8244 ix86_expand_move (enum machine_mode mode, rtx operands[])
8246 int strict = (reload_in_progress || reload_completed);
8248 enum tls_model model;
8253 model = tls_symbolic_operand (op1, Pmode);
8256 op1 = legitimize_tls_address (op1, model, true);
8257 op1 = force_operand (op1, op0);
8262 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8267 rtx temp = ((reload_in_progress
8268 || ((op0 && GET_CODE (op0) == REG)
8270 ? op0 : gen_reg_rtx (Pmode));
8271 op1 = machopic_indirect_data_reference (op1, temp);
8272 op1 = machopic_legitimize_pic_address (op1, mode,
8273 temp == op1 ? 0 : temp);
8275 else if (MACHOPIC_INDIRECT)
8276 op1 = machopic_indirect_data_reference (op1, 0);
8280 if (GET_CODE (op0) == MEM)
8281 op1 = force_reg (Pmode, op1);
8285 if (GET_CODE (temp) != REG)
8286 temp = gen_reg_rtx (Pmode);
8287 temp = legitimize_pic_address (op1, temp);
8292 #endif /* TARGET_MACHO */
8296 if (GET_CODE (op0) == MEM
8297 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8298 || !push_operand (op0, mode))
8299 && GET_CODE (op1) == MEM)
8300 op1 = force_reg (mode, op1);
8302 if (push_operand (op0, mode)
8303 && ! general_no_elim_operand (op1, mode))
8304 op1 = copy_to_mode_reg (mode, op1);
8306 /* Force large constants in 64bit compilation into register
8307 to get them CSEed. */
8308 if (TARGET_64BIT && mode == DImode
8309 && immediate_operand (op1, mode)
8310 && !x86_64_zero_extended_value (op1)
8311 && !register_operand (op0, mode)
8312 && optimize && !reload_completed && !reload_in_progress)
8313 op1 = copy_to_mode_reg (mode, op1);
8315 if (FLOAT_MODE_P (mode))
8317 /* If we are loading a floating point constant to a register,
8318 force the value to memory now, since we'll get better code
8319 out the back end. */
8323 else if (GET_CODE (op1) == CONST_DOUBLE)
8325 op1 = validize_mem (force_const_mem (mode, op1));
8326 if (!register_operand (op0, mode))
8328 rtx temp = gen_reg_rtx (mode);
8329 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8330 emit_move_insn (op0, temp);
8337 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8341 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8343 /* Force constants other than zero into memory. We do not know how
8344 the instructions used to build constants modify the upper 64 bits
8345 of the register, once we have that information we may be able
8346 to handle some of them more efficiently. */
8347 if ((reload_in_progress | reload_completed) == 0
8348 && register_operand (operands[0], mode)
8349 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8350 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8352 /* Make operand1 a register if it isn't already. */
8354 && !register_operand (operands[0], mode)
8355 && !register_operand (operands[1], mode))
8357 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8358 emit_move_insn (operands[0], temp);
8362 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8365 /* Attempt to expand a binary operator. Make the expansion closer to the
8366 actual machine, then just general_operand, which will allow 3 separate
8367 memory references (one output, two input) in a single insn. */
8370 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8373 int matching_memory;
8374 rtx src1, src2, dst, op, clob;
8380 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8381 if (GET_RTX_CLASS (code) == 'c'
8382 && (rtx_equal_p (dst, src2)
8383 || immediate_operand (src1, mode)))
8390 /* If the destination is memory, and we do not have matching source
8391 operands, do things in registers. */
8392 matching_memory = 0;
8393 if (GET_CODE (dst) == MEM)
8395 if (rtx_equal_p (dst, src1))
8396 matching_memory = 1;
8397 else if (GET_RTX_CLASS (code) == 'c'
8398 && rtx_equal_p (dst, src2))
8399 matching_memory = 2;
8401 dst = gen_reg_rtx (mode);
8404 /* Both source operands cannot be in memory. */
8405 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8407 if (matching_memory != 2)
8408 src2 = force_reg (mode, src2);
8410 src1 = force_reg (mode, src1);
8413 /* If the operation is not commutable, source 1 cannot be a constant
8414 or non-matching memory. */
8415 if ((CONSTANT_P (src1)
8416 || (!matching_memory && GET_CODE (src1) == MEM))
8417 && GET_RTX_CLASS (code) != 'c')
8418 src1 = force_reg (mode, src1);
8420 /* If optimizing, copy to regs to improve CSE */
8421 if (optimize && ! no_new_pseudos)
8423 if (GET_CODE (dst) == MEM)
8424 dst = gen_reg_rtx (mode);
8425 if (GET_CODE (src1) == MEM)
8426 src1 = force_reg (mode, src1);
8427 if (GET_CODE (src2) == MEM)
8428 src2 = force_reg (mode, src2);
8431 /* Emit the instruction. */
8433 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8434 if (reload_in_progress)
8436 /* Reload doesn't know about the flags register, and doesn't know that
8437 it doesn't want to clobber it. We can only do this with PLUS. */
8444 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8445 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8448 /* Fix up the destination if needed. */
8449 if (dst != operands[0])
8450 emit_move_insn (operands[0], dst);
8453 /* Return TRUE or FALSE depending on whether the binary operator meets the
8454 appropriate constraints. */
8457 ix86_binary_operator_ok (enum rtx_code code,
8458 enum machine_mode mode ATTRIBUTE_UNUSED,
8461 /* Both source operands cannot be in memory. */
8462 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8464 /* If the operation is not commutable, source 1 cannot be a constant. */
8465 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8467 /* If the destination is memory, we must have a matching source operand. */
8468 if (GET_CODE (operands[0]) == MEM
8469 && ! (rtx_equal_p (operands[0], operands[1])
8470 || (GET_RTX_CLASS (code) == 'c'
8471 && rtx_equal_p (operands[0], operands[2]))))
8473 /* If the operation is not commutable and the source 1 is memory, we must
8474 have a matching destination. */
8475 if (GET_CODE (operands[1]) == MEM
8476 && GET_RTX_CLASS (code) != 'c'
8477 && ! rtx_equal_p (operands[0], operands[1]))
8482 /* Attempt to expand a unary operator. Make the expansion closer to the
8483 actual machine, then just general_operand, which will allow 2 separate
8484 memory references (one output, one input) in a single insn. */
8487 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8490 int matching_memory;
8491 rtx src, dst, op, clob;
8496 /* If the destination is memory, and we do not have matching source
8497 operands, do things in registers. */
8498 matching_memory = 0;
8499 if (GET_CODE (dst) == MEM)
8501 if (rtx_equal_p (dst, src))
8502 matching_memory = 1;
8504 dst = gen_reg_rtx (mode);
8507 /* When source operand is memory, destination must match. */
8508 if (!matching_memory && GET_CODE (src) == MEM)
8509 src = force_reg (mode, src);
8511 /* If optimizing, copy to regs to improve CSE */
8512 if (optimize && ! no_new_pseudos)
8514 if (GET_CODE (dst) == MEM)
8515 dst = gen_reg_rtx (mode);
8516 if (GET_CODE (src) == MEM)
8517 src = force_reg (mode, src);
8520 /* Emit the instruction. */
8522 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8523 if (reload_in_progress || code == NOT)
8525 /* Reload doesn't know about the flags register, and doesn't know that
8526 it doesn't want to clobber it. */
8533 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8534 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8537 /* Fix up the destination if needed. */
8538 if (dst != operands[0])
8539 emit_move_insn (operands[0], dst);
8542 /* Return TRUE or FALSE depending on whether the unary operator meets the
8543 appropriate constraints. */
8546 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8547 enum machine_mode mode ATTRIBUTE_UNUSED,
8548 rtx operands[2] ATTRIBUTE_UNUSED)
8550 /* If one of operands is memory, source and destination must match. */
8551 if ((GET_CODE (operands[0]) == MEM
8552 || GET_CODE (operands[1]) == MEM)
8553 && ! rtx_equal_p (operands[0], operands[1]))
8558 /* Return TRUE or FALSE depending on whether the first SET in INSN
8559 has source and destination with matching CC modes, and that the
8560 CC mode is at least as constrained as REQ_MODE. */
8563 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8566 enum machine_mode set_mode;
8568 set = PATTERN (insn);
8569 if (GET_CODE (set) == PARALLEL)
8570 set = XVECEXP (set, 0, 0);
8571 if (GET_CODE (set) != SET)
8573 if (GET_CODE (SET_SRC (set)) != COMPARE)
8576 set_mode = GET_MODE (SET_DEST (set));
8580 if (req_mode != CCNOmode
8581 && (req_mode != CCmode
8582 || XEXP (SET_SRC (set), 1) != const0_rtx))
8586 if (req_mode == CCGCmode)
8590 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8594 if (req_mode == CCZmode)
8604 return (GET_MODE (SET_SRC (set)) == set_mode);
8607 /* Generate insn patterns to do an integer compare of OPERANDS. */
8610 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8612 enum machine_mode cmpmode;
8615 cmpmode = SELECT_CC_MODE (code, op0, op1);
8616 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8618 /* This is very simple, but making the interface the same as in the
8619 FP case makes the rest of the code easier. */
8620 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8621 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8623 /* Return the test that should be put into the flags user, i.e.
8624 the bcc, scc, or cmov instruction. */
8625 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8628 /* Figure out whether to use ordered or unordered fp comparisons.
8629 Return the appropriate mode to use. */
8632 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8634 /* ??? In order to make all comparisons reversible, we do all comparisons
8635 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8636 all forms trapping and nontrapping comparisons, we can make inequality
8637 comparisons trapping again, since it results in better code when using
8638 FCOM based compares. */
8639 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8643 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8645 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8646 return ix86_fp_compare_mode (code);
8649 /* Only zero flag is needed. */
8651 case NE: /* ZF!=0 */
8653 /* Codes needing carry flag. */
8654 case GEU: /* CF=0 */
8655 case GTU: /* CF=0 & ZF=0 */
8656 case LTU: /* CF=1 */
8657 case LEU: /* CF=1 | ZF=1 */
8659 /* Codes possibly doable only with sign flag when
8660 comparing against zero. */
8661 case GE: /* SF=OF or SF=0 */
8662 case LT: /* SF<>OF or SF=1 */
8663 if (op1 == const0_rtx)
8666 /* For other cases Carry flag is not required. */
8668 /* Codes doable only with sign flag when comparing
8669 against zero, but we miss jump instruction for it
8670 so we need to use relational tests against overflow
8671 that thus needs to be zero. */
8672 case GT: /* ZF=0 & SF=OF */
8673 case LE: /* ZF=1 | SF<>OF */
8674 if (op1 == const0_rtx)
8678 /* strcmp pattern do (use flags) and combine may ask us for proper
8687 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8690 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8692 enum rtx_code swapped_code = swap_condition (code);
8693 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8694 || (ix86_fp_comparison_cost (swapped_code)
8695 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8698 /* Swap, force into registers, or otherwise massage the two operands
8699 to a fp comparison. The operands are updated in place; the new
8700 comparison code is returned. */
8702 static enum rtx_code
8703 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8705 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8706 rtx op0 = *pop0, op1 = *pop1;
8707 enum machine_mode op_mode = GET_MODE (op0);
8708 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8710 /* All of the unordered compare instructions only work on registers.
8711 The same is true of the XFmode compare instructions. The same is
8712 true of the fcomi compare instructions. */
8715 && (fpcmp_mode == CCFPUmode
8716 || op_mode == XFmode
8717 || ix86_use_fcomi_compare (code)))
8719 op0 = force_reg (op_mode, op0);
8720 op1 = force_reg (op_mode, op1);
8724 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8725 things around if they appear profitable, otherwise force op0
8728 if (standard_80387_constant_p (op0) == 0
8729 || (GET_CODE (op0) == MEM
8730 && ! (standard_80387_constant_p (op1) == 0
8731 || GET_CODE (op1) == MEM)))
8734 tmp = op0, op0 = op1, op1 = tmp;
8735 code = swap_condition (code);
8738 if (GET_CODE (op0) != REG)
8739 op0 = force_reg (op_mode, op0);
8741 if (CONSTANT_P (op1))
8743 if (standard_80387_constant_p (op1))
8744 op1 = force_reg (op_mode, op1);
8746 op1 = validize_mem (force_const_mem (op_mode, op1));
8750 /* Try to rearrange the comparison to make it cheaper. */
8751 if (ix86_fp_comparison_cost (code)
8752 > ix86_fp_comparison_cost (swap_condition (code))
8753 && (GET_CODE (op1) == REG || !no_new_pseudos))
8756 tmp = op0, op0 = op1, op1 = tmp;
8757 code = swap_condition (code);
8758 if (GET_CODE (op0) != REG)
8759 op0 = force_reg (op_mode, op0);
8767 /* Convert comparison codes we use to represent FP comparison to integer
8768 code that will result in proper branch. Return UNKNOWN if no such code
8770 static enum rtx_code
8771 ix86_fp_compare_code_to_integer (enum rtx_code code)
8800 /* Split comparison code CODE into comparisons we can do using branch
8801 instructions. BYPASS_CODE is comparison code for branch that will
8802 branch around FIRST_CODE and SECOND_CODE. If some of branches
8803 is not required, set value to NIL.
8804 We never require more than two branches. */
8806 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8807 enum rtx_code *first_code,
8808 enum rtx_code *second_code)
8814 /* The fcomi comparison sets flags as follows:
8824 case GT: /* GTU - CF=0 & ZF=0 */
8825 case GE: /* GEU - CF=0 */
8826 case ORDERED: /* PF=0 */
8827 case UNORDERED: /* PF=1 */
8828 case UNEQ: /* EQ - ZF=1 */
8829 case UNLT: /* LTU - CF=1 */
8830 case UNLE: /* LEU - CF=1 | ZF=1 */
8831 case LTGT: /* EQ - ZF=0 */
8833 case LT: /* LTU - CF=1 - fails on unordered */
8835 *bypass_code = UNORDERED;
8837 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8839 *bypass_code = UNORDERED;
8841 case EQ: /* EQ - ZF=1 - fails on unordered */
8843 *bypass_code = UNORDERED;
8845 case NE: /* NE - ZF=0 - fails on unordered */
8847 *second_code = UNORDERED;
8849 case UNGE: /* GEU - CF=0 - fails on unordered */
8851 *second_code = UNORDERED;
8853 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8855 *second_code = UNORDERED;
8860 if (!TARGET_IEEE_FP)
8867 /* Return cost of comparison done fcom + arithmetics operations on AX.
8868 All following functions do use number of instructions as a cost metrics.
8869 In future this should be tweaked to compute bytes for optimize_size and
8870 take into account performance of various instructions on various CPUs. */
8872 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8874 if (!TARGET_IEEE_FP)
8876 /* The cost of code output by ix86_expand_fp_compare. */
8904 /* Return cost of comparison done using fcomi operation.
8905 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8907 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8909 enum rtx_code bypass_code, first_code, second_code;
8910 /* Return arbitrarily high cost when instruction is not supported - this
8911 prevents gcc from using it. */
8914 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8915 return (bypass_code != NIL || second_code != NIL) + 2;
8918 /* Return cost of comparison done using sahf operation.
8919 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8921 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8923 enum rtx_code bypass_code, first_code, second_code;
8924 /* Return arbitrarily high cost when instruction is not preferred - this
8925 avoids gcc from using it. */
8926 if (!TARGET_USE_SAHF && !optimize_size)
8928 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8929 return (bypass_code != NIL || second_code != NIL) + 3;
8932 /* Compute cost of the comparison done using any method.
8933 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8935 ix86_fp_comparison_cost (enum rtx_code code)
8937 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8940 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8941 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8943 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8944 if (min > sahf_cost)
8946 if (min > fcomi_cost)
8951 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8954 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8955 rtx *second_test, rtx *bypass_test)
8957 enum machine_mode fpcmp_mode, intcmp_mode;
8959 int cost = ix86_fp_comparison_cost (code);
8960 enum rtx_code bypass_code, first_code, second_code;
8962 fpcmp_mode = ix86_fp_compare_mode (code);
8963 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8966 *second_test = NULL_RTX;
8968 *bypass_test = NULL_RTX;
8970 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8972 /* Do fcomi/sahf based test when profitable. */
8973 if ((bypass_code == NIL || bypass_test)
8974 && (second_code == NIL || second_test)
8975 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8979 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8980 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8986 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8987 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8989 scratch = gen_reg_rtx (HImode);
8990 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8991 emit_insn (gen_x86_sahf_1 (scratch));
8994 /* The FP codes work out to act like unsigned. */
8995 intcmp_mode = fpcmp_mode;
8997 if (bypass_code != NIL)
8998 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8999 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9001 if (second_code != NIL)
9002 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9003 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9008 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9009 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9010 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9012 scratch = gen_reg_rtx (HImode);
9013 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9015 /* In the unordered case, we have to check C2 for NaN's, which
9016 doesn't happen to work out to anything nice combination-wise.
9017 So do some bit twiddling on the value we've got in AH to come
9018 up with an appropriate set of condition codes. */
9020 intcmp_mode = CCNOmode;
9025 if (code == GT || !TARGET_IEEE_FP)
9027 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9032 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9033 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9034 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9035 intcmp_mode = CCmode;
9041 if (code == LT && TARGET_IEEE_FP)
9043 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9044 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9045 intcmp_mode = CCmode;
9050 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9056 if (code == GE || !TARGET_IEEE_FP)
9058 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9063 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9064 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9071 if (code == LE && TARGET_IEEE_FP)
9073 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9074 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9075 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9076 intcmp_mode = CCmode;
9081 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9087 if (code == EQ && TARGET_IEEE_FP)
9089 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9090 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9091 intcmp_mode = CCmode;
9096 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9103 if (code == NE && TARGET_IEEE_FP)
9105 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9106 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9112 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9118 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9122 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9131 /* Return the test that should be put into the flags user, i.e.
9132 the bcc, scc, or cmov instruction. */
9133 return gen_rtx_fmt_ee (code, VOIDmode,
9134 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9139 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9142 op0 = ix86_compare_op0;
9143 op1 = ix86_compare_op1;
9146 *second_test = NULL_RTX;
9148 *bypass_test = NULL_RTX;
9150 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9151 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9152 second_test, bypass_test);
9154 ret = ix86_expand_int_compare (code, op0, op1);
9159 /* Return true if the CODE will result in nontrivial jump sequence. */
9161 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9163 enum rtx_code bypass_code, first_code, second_code;
9166 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9167 return bypass_code != NIL || second_code != NIL;
9171 ix86_expand_branch (enum rtx_code code, rtx label)
9175 switch (GET_MODE (ix86_compare_op0))
9181 tmp = ix86_expand_compare (code, NULL, NULL);
9182 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9183 gen_rtx_LABEL_REF (VOIDmode, label),
9185 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9194 enum rtx_code bypass_code, first_code, second_code;
9196 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9199 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9201 /* Check whether we will use the natural sequence with one jump. If
9202 so, we can expand jump early. Otherwise delay expansion by
9203 creating compound insn to not confuse optimizers. */
9204 if (bypass_code == NIL && second_code == NIL
9207 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9208 gen_rtx_LABEL_REF (VOIDmode, label),
9213 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9214 ix86_compare_op0, ix86_compare_op1);
9215 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9216 gen_rtx_LABEL_REF (VOIDmode, label),
9218 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9220 use_fcomi = ix86_use_fcomi_compare (code);
9221 vec = rtvec_alloc (3 + !use_fcomi);
9222 RTVEC_ELT (vec, 0) = tmp;
9224 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9226 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9229 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9231 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9239 /* Expand DImode branch into multiple compare+branch. */
9241 rtx lo[2], hi[2], label2;
9242 enum rtx_code code1, code2, code3;
9244 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9246 tmp = ix86_compare_op0;
9247 ix86_compare_op0 = ix86_compare_op1;
9248 ix86_compare_op1 = tmp;
9249 code = swap_condition (code);
9251 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9252 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9254 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9255 avoid two branches. This costs one extra insn, so disable when
9256 optimizing for size. */
9258 if ((code == EQ || code == NE)
9260 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9265 if (hi[1] != const0_rtx)
9266 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9267 NULL_RTX, 0, OPTAB_WIDEN);
9270 if (lo[1] != const0_rtx)
9271 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9272 NULL_RTX, 0, OPTAB_WIDEN);
9274 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9275 NULL_RTX, 0, OPTAB_WIDEN);
9277 ix86_compare_op0 = tmp;
9278 ix86_compare_op1 = const0_rtx;
9279 ix86_expand_branch (code, label);
9283 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9284 op1 is a constant and the low word is zero, then we can just
9285 examine the high word. */
9287 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9290 case LT: case LTU: case GE: case GEU:
9291 ix86_compare_op0 = hi[0];
9292 ix86_compare_op1 = hi[1];
9293 ix86_expand_branch (code, label);
9299 /* Otherwise, we need two or three jumps. */
9301 label2 = gen_label_rtx ();
9304 code2 = swap_condition (code);
9305 code3 = unsigned_condition (code);
9309 case LT: case GT: case LTU: case GTU:
9312 case LE: code1 = LT; code2 = GT; break;
9313 case GE: code1 = GT; code2 = LT; break;
9314 case LEU: code1 = LTU; code2 = GTU; break;
9315 case GEU: code1 = GTU; code2 = LTU; break;
9317 case EQ: code1 = NIL; code2 = NE; break;
9318 case NE: code2 = NIL; break;
9326 * if (hi(a) < hi(b)) goto true;
9327 * if (hi(a) > hi(b)) goto false;
9328 * if (lo(a) < lo(b)) goto true;
9332 ix86_compare_op0 = hi[0];
9333 ix86_compare_op1 = hi[1];
9336 ix86_expand_branch (code1, label);
9338 ix86_expand_branch (code2, label2);
9340 ix86_compare_op0 = lo[0];
9341 ix86_compare_op1 = lo[1];
9342 ix86_expand_branch (code3, label);
9345 emit_label (label2);
9354 /* Split branch based on floating point condition. */
9356 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9357 rtx target1, rtx target2, rtx tmp)
9360 rtx label = NULL_RTX;
9362 int bypass_probability = -1, second_probability = -1, probability = -1;
9365 if (target2 != pc_rtx)
9368 code = reverse_condition_maybe_unordered (code);
9373 condition = ix86_expand_fp_compare (code, op1, op2,
9374 tmp, &second, &bypass);
9376 if (split_branch_probability >= 0)
9378 /* Distribute the probabilities across the jumps.
9379 Assume the BYPASS and SECOND to be always test
9381 probability = split_branch_probability;
9383 /* Value of 1 is low enough to make no need for probability
9384 to be updated. Later we may run some experiments and see
9385 if unordered values are more frequent in practice. */
9387 bypass_probability = 1;
9389 second_probability = 1;
9391 if (bypass != NULL_RTX)
9393 label = gen_label_rtx ();
9394 i = emit_jump_insn (gen_rtx_SET
9396 gen_rtx_IF_THEN_ELSE (VOIDmode,
9398 gen_rtx_LABEL_REF (VOIDmode,
9401 if (bypass_probability >= 0)
9403 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9404 GEN_INT (bypass_probability),
9407 i = emit_jump_insn (gen_rtx_SET
9409 gen_rtx_IF_THEN_ELSE (VOIDmode,
9410 condition, target1, target2)));
9411 if (probability >= 0)
9413 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9414 GEN_INT (probability),
9416 if (second != NULL_RTX)
9418 i = emit_jump_insn (gen_rtx_SET
9420 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9422 if (second_probability >= 0)
9424 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9425 GEN_INT (second_probability),
9428 if (label != NULL_RTX)
9433 ix86_expand_setcc (enum rtx_code code, rtx dest)
9435 rtx ret, tmp, tmpreg, equiv;
9436 rtx second_test, bypass_test;
9438 if (GET_MODE (ix86_compare_op0) == DImode
9440 return 0; /* FAIL */
9442 if (GET_MODE (dest) != QImode)
9445 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9446 PUT_MODE (ret, QImode);
9451 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9452 if (bypass_test || second_test)
9454 rtx test = second_test;
9456 rtx tmp2 = gen_reg_rtx (QImode);
9463 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9465 PUT_MODE (test, QImode);
9466 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9469 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9471 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9474 /* Attach a REG_EQUAL note describing the comparison result. */
9475 equiv = simplify_gen_relational (code, QImode,
9476 GET_MODE (ix86_compare_op0),
9477 ix86_compare_op0, ix86_compare_op1);
9478 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9480 return 1; /* DONE */
9483 /* Expand comparison setting or clearing carry flag. Return true when
9484 successful and set pop for the operation. */
9486 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9488 enum machine_mode mode =
9489 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9491 /* Do not handle DImode compares that go trought special path. Also we can't
9492 deal with FP compares yet. This is possible to add. */
9493 if ((mode == DImode && !TARGET_64BIT))
9495 if (FLOAT_MODE_P (mode))
9497 rtx second_test = NULL, bypass_test = NULL;
9498 rtx compare_op, compare_seq;
9500 /* Shortcut: following common codes never translate into carry flag compares. */
9501 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9502 || code == ORDERED || code == UNORDERED)
9505 /* These comparisons require zero flag; swap operands so they won't. */
9506 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9512 code = swap_condition (code);
9515 /* Try to expand the comparison and verify that we end up with carry flag
9516 based comparison. This is fails to be true only when we decide to expand
9517 comparison using arithmetic that is not too common scenario. */
9519 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9520 &second_test, &bypass_test);
9521 compare_seq = get_insns ();
9524 if (second_test || bypass_test)
9526 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9527 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9528 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9530 code = GET_CODE (compare_op);
9531 if (code != LTU && code != GEU)
9533 emit_insn (compare_seq);
9537 if (!INTEGRAL_MODE_P (mode))
9545 /* Convert a==0 into (unsigned)a<1. */
9548 if (op1 != const0_rtx)
9551 code = (code == EQ ? LTU : GEU);
9554 /* Convert a>b into b<a or a>=b-1. */
9557 if (GET_CODE (op1) == CONST_INT)
9559 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9560 /* Bail out on overflow. We still can swap operands but that
9561 would force loading of the constant into register. */
9562 if (op1 == const0_rtx
9563 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9565 code = (code == GTU ? GEU : LTU);
9572 code = (code == GTU ? LTU : GEU);
9576 /* Convert a>=0 into (unsigned)a<0x80000000. */
9579 if (mode == DImode || op1 != const0_rtx)
9581 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9582 code = (code == LT ? GEU : LTU);
9586 if (mode == DImode || op1 != constm1_rtx)
9588 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9589 code = (code == LE ? GEU : LTU);
9595 /* Swapping operands may cause constant to appear as first operand. */
9596 if (!nonimmediate_operand (op0, VOIDmode))
9600 op0 = force_reg (mode, op0);
9602 ix86_compare_op0 = op0;
9603 ix86_compare_op1 = op1;
9604 *pop = ix86_expand_compare (code, NULL, NULL);
9605 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9611 ix86_expand_int_movcc (rtx operands[])
9613 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9614 rtx compare_seq, compare_op;
9615 rtx second_test, bypass_test;
9616 enum machine_mode mode = GET_MODE (operands[0]);
9617 bool sign_bit_compare_p = false;;
9620 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9621 compare_seq = get_insns ();
9624 compare_code = GET_CODE (compare_op);
9626 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9627 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9628 sign_bit_compare_p = true;
9630 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9631 HImode insns, we'd be swallowed in word prefix ops. */
9633 if ((mode != HImode || TARGET_FAST_PREFIX)
9634 && (mode != DImode || TARGET_64BIT)
9635 && GET_CODE (operands[2]) == CONST_INT
9636 && GET_CODE (operands[3]) == CONST_INT)
9638 rtx out = operands[0];
9639 HOST_WIDE_INT ct = INTVAL (operands[2]);
9640 HOST_WIDE_INT cf = INTVAL (operands[3]);
9644 /* Sign bit compares are better done using shifts than we do by using
9646 if (sign_bit_compare_p
9647 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9648 ix86_compare_op1, &compare_op))
9650 /* Detect overlap between destination and compare sources. */
9653 if (!sign_bit_compare_p)
9657 compare_code = GET_CODE (compare_op);
9659 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9660 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9663 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9666 /* To simplify rest of code, restrict to the GEU case. */
9667 if (compare_code == LTU)
9669 HOST_WIDE_INT tmp = ct;
9672 compare_code = reverse_condition (compare_code);
9673 code = reverse_condition (code);
9678 PUT_CODE (compare_op,
9679 reverse_condition_maybe_unordered
9680 (GET_CODE (compare_op)));
9682 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9686 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9687 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9688 tmp = gen_reg_rtx (mode);
9691 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9693 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9697 if (code == GT || code == GE)
9698 code = reverse_condition (code);
9701 HOST_WIDE_INT tmp = ct;
9706 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9707 ix86_compare_op1, VOIDmode, 0, -1);
9720 tmp = expand_simple_binop (mode, PLUS,
9722 copy_rtx (tmp), 1, OPTAB_DIRECT);
9733 tmp = expand_simple_binop (mode, IOR,
9735 copy_rtx (tmp), 1, OPTAB_DIRECT);
9737 else if (diff == -1 && ct)
9747 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9749 tmp = expand_simple_binop (mode, PLUS,
9750 copy_rtx (tmp), GEN_INT (cf),
9751 copy_rtx (tmp), 1, OPTAB_DIRECT);
9759 * andl cf - ct, dest
9769 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9772 tmp = expand_simple_binop (mode, AND,
9774 gen_int_mode (cf - ct, mode),
9775 copy_rtx (tmp), 1, OPTAB_DIRECT);
9777 tmp = expand_simple_binop (mode, PLUS,
9778 copy_rtx (tmp), GEN_INT (ct),
9779 copy_rtx (tmp), 1, OPTAB_DIRECT);
9782 if (!rtx_equal_p (tmp, out))
9783 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9785 return 1; /* DONE */
9791 tmp = ct, ct = cf, cf = tmp;
9793 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9795 /* We may be reversing unordered compare to normal compare, that
9796 is not valid in general (we may convert non-trapping condition
9797 to trapping one), however on i386 we currently emit all
9798 comparisons unordered. */
9799 compare_code = reverse_condition_maybe_unordered (compare_code);
9800 code = reverse_condition_maybe_unordered (code);
9804 compare_code = reverse_condition (compare_code);
9805 code = reverse_condition (code);
9810 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9811 && GET_CODE (ix86_compare_op1) == CONST_INT)
9813 if (ix86_compare_op1 == const0_rtx
9814 && (code == LT || code == GE))
9815 compare_code = code;
9816 else if (ix86_compare_op1 == constm1_rtx)
9820 else if (code == GT)
9825 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9826 if (compare_code != NIL
9827 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9828 && (cf == -1 || ct == -1))
9830 /* If lea code below could be used, only optimize
9831 if it results in a 2 insn sequence. */
9833 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9834 || diff == 3 || diff == 5 || diff == 9)
9835 || (compare_code == LT && ct == -1)
9836 || (compare_code == GE && cf == -1))
9839 * notl op1 (if necessary)
9847 code = reverse_condition (code);
9850 out = emit_store_flag (out, code, ix86_compare_op0,
9851 ix86_compare_op1, VOIDmode, 0, -1);
9853 out = expand_simple_binop (mode, IOR,
9855 out, 1, OPTAB_DIRECT);
9856 if (out != operands[0])
9857 emit_move_insn (operands[0], out);
9859 return 1; /* DONE */
9864 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9865 || diff == 3 || diff == 5 || diff == 9)
9866 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9867 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9873 * lea cf(dest*(ct-cf)),dest
9877 * This also catches the degenerate setcc-only case.
9883 out = emit_store_flag (out, code, ix86_compare_op0,
9884 ix86_compare_op1, VOIDmode, 0, 1);
9887 /* On x86_64 the lea instruction operates on Pmode, so we need
9888 to get arithmetics done in proper mode to match. */
9890 tmp = copy_rtx (out);
9894 out1 = copy_rtx (out);
9895 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9899 tmp = gen_rtx_PLUS (mode, tmp, out1);
9905 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9908 if (!rtx_equal_p (tmp, out))
9911 out = force_operand (tmp, copy_rtx (out));
9913 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9915 if (!rtx_equal_p (out, operands[0]))
9916 emit_move_insn (operands[0], copy_rtx (out));
9918 return 1; /* DONE */
9922 * General case: Jumpful:
9923 * xorl dest,dest cmpl op1, op2
9924 * cmpl op1, op2 movl ct, dest
9926 * decl dest movl cf, dest
9927 * andl (cf-ct),dest 1:
9932 * This is reasonably steep, but branch mispredict costs are
9933 * high on modern cpus, so consider failing only if optimizing
9937 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9938 && BRANCH_COST >= 2)
9944 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9945 /* We may be reversing unordered compare to normal compare,
9946 that is not valid in general (we may convert non-trapping
9947 condition to trapping one), however on i386 we currently
9948 emit all comparisons unordered. */
9949 code = reverse_condition_maybe_unordered (code);
9952 code = reverse_condition (code);
9953 if (compare_code != NIL)
9954 compare_code = reverse_condition (compare_code);
9958 if (compare_code != NIL)
9960 /* notl op1 (if needed)
9965 For x < 0 (resp. x <= -1) there will be no notl,
9966 so if possible swap the constants to get rid of the
9968 True/false will be -1/0 while code below (store flag
9969 followed by decrement) is 0/-1, so the constants need
9970 to be exchanged once more. */
9972 if (compare_code == GE || !cf)
9974 code = reverse_condition (code);
9979 HOST_WIDE_INT tmp = cf;
9984 out = emit_store_flag (out, code, ix86_compare_op0,
9985 ix86_compare_op1, VOIDmode, 0, -1);
9989 out = emit_store_flag (out, code, ix86_compare_op0,
9990 ix86_compare_op1, VOIDmode, 0, 1);
9992 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9993 copy_rtx (out), 1, OPTAB_DIRECT);
9996 out = expand_simple_binop (mode, AND, copy_rtx (out),
9997 gen_int_mode (cf - ct, mode),
9998 copy_rtx (out), 1, OPTAB_DIRECT);
10000 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10001 copy_rtx (out), 1, OPTAB_DIRECT);
10002 if (!rtx_equal_p (out, operands[0]))
10003 emit_move_insn (operands[0], copy_rtx (out));
10005 return 1; /* DONE */
10009 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10011 /* Try a few things more with specific constants and a variable. */
10014 rtx var, orig_out, out, tmp;
10016 if (BRANCH_COST <= 2)
10017 return 0; /* FAIL */
10019 /* If one of the two operands is an interesting constant, load a
10020 constant with the above and mask it in with a logical operation. */
10022 if (GET_CODE (operands[2]) == CONST_INT)
10025 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10026 operands[3] = constm1_rtx, op = and_optab;
10027 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10028 operands[3] = const0_rtx, op = ior_optab;
10030 return 0; /* FAIL */
10032 else if (GET_CODE (operands[3]) == CONST_INT)
10035 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10036 operands[2] = constm1_rtx, op = and_optab;
10037 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10038 operands[2] = const0_rtx, op = ior_optab;
10040 return 0; /* FAIL */
10043 return 0; /* FAIL */
10045 orig_out = operands[0];
10046 tmp = gen_reg_rtx (mode);
10049 /* Recurse to get the constant loaded. */
10050 if (ix86_expand_int_movcc (operands) == 0)
10051 return 0; /* FAIL */
10053 /* Mask in the interesting variable. */
10054 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10056 if (!rtx_equal_p (out, orig_out))
10057 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10059 return 1; /* DONE */
10063 * For comparison with above,
10073 if (! nonimmediate_operand (operands[2], mode))
10074 operands[2] = force_reg (mode, operands[2]);
10075 if (! nonimmediate_operand (operands[3], mode))
10076 operands[3] = force_reg (mode, operands[3]);
10078 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10080 rtx tmp = gen_reg_rtx (mode);
10081 emit_move_insn (tmp, operands[3]);
10084 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10086 rtx tmp = gen_reg_rtx (mode);
10087 emit_move_insn (tmp, operands[2]);
10091 if (! register_operand (operands[2], VOIDmode)
10093 || ! register_operand (operands[3], VOIDmode)))
10094 operands[2] = force_reg (mode, operands[2]);
10097 && ! register_operand (operands[3], VOIDmode))
10098 operands[3] = force_reg (mode, operands[3]);
10100 emit_insn (compare_seq);
10101 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10102 gen_rtx_IF_THEN_ELSE (mode,
10103 compare_op, operands[2],
10106 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10107 gen_rtx_IF_THEN_ELSE (mode,
10109 copy_rtx (operands[3]),
10110 copy_rtx (operands[0]))));
10112 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10113 gen_rtx_IF_THEN_ELSE (mode,
10115 copy_rtx (operands[2]),
10116 copy_rtx (operands[0]))));
10118 return 1; /* DONE */
10122 ix86_expand_fp_movcc (rtx operands[])
10124 enum rtx_code code;
10126 rtx compare_op, second_test, bypass_test;
10128 /* For SF/DFmode conditional moves based on comparisons
10129 in same mode, we may want to use SSE min/max instructions. */
10130 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10131 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10132 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10133 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10134 && (!TARGET_IEEE_FP
10135 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10136 /* We may be called from the post-reload splitter. */
10137 && (!REG_P (operands[0])
10138 || SSE_REG_P (operands[0])
10139 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10141 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10142 code = GET_CODE (operands[1]);
10144 /* See if we have (cross) match between comparison operands and
10145 conditional move operands. */
10146 if (rtx_equal_p (operands[2], op1))
10151 code = reverse_condition_maybe_unordered (code);
10153 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10155 /* Check for min operation. */
10156 if (code == LT || code == UNLE)
10164 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10165 if (memory_operand (op0, VOIDmode))
10166 op0 = force_reg (GET_MODE (operands[0]), op0);
10167 if (GET_MODE (operands[0]) == SFmode)
10168 emit_insn (gen_minsf3 (operands[0], op0, op1));
10170 emit_insn (gen_mindf3 (operands[0], op0, op1));
10173 /* Check for max operation. */
10174 if (code == GT || code == UNGE)
10182 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10183 if (memory_operand (op0, VOIDmode))
10184 op0 = force_reg (GET_MODE (operands[0]), op0);
10185 if (GET_MODE (operands[0]) == SFmode)
10186 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10188 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10192 /* Manage condition to be sse_comparison_operator. In case we are
10193 in non-ieee mode, try to canonicalize the destination operand
10194 to be first in the comparison - this helps reload to avoid extra
10196 if (!sse_comparison_operator (operands[1], VOIDmode)
10197 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10199 rtx tmp = ix86_compare_op0;
10200 ix86_compare_op0 = ix86_compare_op1;
10201 ix86_compare_op1 = tmp;
10202 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10203 VOIDmode, ix86_compare_op0,
10206 /* Similarly try to manage result to be first operand of conditional
10207 move. We also don't support the NE comparison on SSE, so try to
10209 if ((rtx_equal_p (operands[0], operands[3])
10210 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10211 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10213 rtx tmp = operands[2];
10214 operands[2] = operands[3];
10216 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10217 (GET_CODE (operands[1])),
10218 VOIDmode, ix86_compare_op0,
10221 if (GET_MODE (operands[0]) == SFmode)
10222 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10223 operands[2], operands[3],
10224 ix86_compare_op0, ix86_compare_op1));
10226 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10227 operands[2], operands[3],
10228 ix86_compare_op0, ix86_compare_op1));
10232 /* The floating point conditional move instructions don't directly
10233 support conditions resulting from a signed integer comparison. */
10235 code = GET_CODE (operands[1]);
10236 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10238 /* The floating point conditional move instructions don't directly
10239 support signed integer comparisons. */
10241 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10243 if (second_test != NULL || bypass_test != NULL)
10245 tmp = gen_reg_rtx (QImode);
10246 ix86_expand_setcc (code, tmp);
10248 ix86_compare_op0 = tmp;
10249 ix86_compare_op1 = const0_rtx;
10250 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10252 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10254 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10255 emit_move_insn (tmp, operands[3]);
10258 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10260 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10261 emit_move_insn (tmp, operands[2]);
10265 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10266 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10271 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10272 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10277 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10278 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10286 /* Expand conditional increment or decrement using adb/sbb instructions.
10287 The default case using setcc followed by the conditional move can be
10288 done by generic code. */
10290 ix86_expand_int_addcc (rtx operands[])
10292 enum rtx_code code = GET_CODE (operands[1]);
10294 rtx val = const0_rtx;
10295 bool fpcmp = false;
10296 enum machine_mode mode = GET_MODE (operands[0]);
10298 if (operands[3] != const1_rtx
10299 && operands[3] != constm1_rtx)
10301 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10302 ix86_compare_op1, &compare_op))
10304 code = GET_CODE (compare_op);
10306 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10307 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10310 code = ix86_fp_compare_code_to_integer (code);
10317 PUT_CODE (compare_op,
10318 reverse_condition_maybe_unordered
10319 (GET_CODE (compare_op)));
10321 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10323 PUT_MODE (compare_op, mode);
10325 /* Construct either adc or sbb insn. */
10326 if ((code == LTU) == (operands[3] == constm1_rtx))
10328 switch (GET_MODE (operands[0]))
10331 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10334 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10337 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10340 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10348 switch (GET_MODE (operands[0]))
10351 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10354 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10357 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10360 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10366 return 1; /* DONE */
10370 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10371 works for floating pointer parameters and nonoffsetable memories.
10372 For pushes, it returns just stack offsets; the values will be saved
10373 in the right order. Maximally three parts are generated. */
10376 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10381 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10383 size = (GET_MODE_SIZE (mode) + 4) / 8;
10385 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10387 if (size < 2 || size > 3)
10390 /* Optimize constant pool reference to immediates. This is used by fp
10391 moves, that force all constants to memory to allow combining. */
10392 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10394 rtx tmp = maybe_get_pool_constant (operand);
10399 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10401 /* The only non-offsetable memories we handle are pushes. */
10402 if (! push_operand (operand, VOIDmode))
10405 operand = copy_rtx (operand);
10406 PUT_MODE (operand, Pmode);
10407 parts[0] = parts[1] = parts[2] = operand;
10409 else if (!TARGET_64BIT)
10411 if (mode == DImode)
10412 split_di (&operand, 1, &parts[0], &parts[1]);
10415 if (REG_P (operand))
10417 if (!reload_completed)
10419 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10420 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10422 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10424 else if (offsettable_memref_p (operand))
10426 operand = adjust_address (operand, SImode, 0);
10427 parts[0] = operand;
10428 parts[1] = adjust_address (operand, SImode, 4);
10430 parts[2] = adjust_address (operand, SImode, 8);
10432 else if (GET_CODE (operand) == CONST_DOUBLE)
10437 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10441 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10442 parts[2] = gen_int_mode (l[2], SImode);
10445 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10450 parts[1] = gen_int_mode (l[1], SImode);
10451 parts[0] = gen_int_mode (l[0], SImode);
10459 if (mode == TImode)
10460 split_ti (&operand, 1, &parts[0], &parts[1]);
10461 if (mode == XFmode || mode == TFmode)
10463 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10464 if (REG_P (operand))
10466 if (!reload_completed)
10468 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10469 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10471 else if (offsettable_memref_p (operand))
10473 operand = adjust_address (operand, DImode, 0);
10474 parts[0] = operand;
10475 parts[1] = adjust_address (operand, upper_mode, 8);
10477 else if (GET_CODE (operand) == CONST_DOUBLE)
10482 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10483 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10484 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10485 if (HOST_BITS_PER_WIDE_INT >= 64)
10488 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10489 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10492 parts[0] = immed_double_const (l[0], l[1], DImode);
10493 if (upper_mode == SImode)
10494 parts[1] = gen_int_mode (l[2], SImode);
10495 else if (HOST_BITS_PER_WIDE_INT >= 64)
10498 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10499 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10502 parts[1] = immed_double_const (l[2], l[3], DImode);
10512 /* Emit insns to perform a move or push of DI, DF, and XF values.
10513 Return false when normal moves are needed; true when all required
10514 insns have been emitted. Operands 2-4 contain the input values
10515 int the correct order; operands 5-7 contain the output values. */
10518 ix86_split_long_move (rtx operands[])
10523 int collisions = 0;
10524 enum machine_mode mode = GET_MODE (operands[0]);
10526 /* The DFmode expanders may ask us to move double.
10527 For 64bit target this is single move. By hiding the fact
10528 here we simplify i386.md splitters. */
10529 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10531 /* Optimize constant pool reference to immediates. This is used by
10532 fp moves, that force all constants to memory to allow combining. */
10534 if (GET_CODE (operands[1]) == MEM
10535 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10536 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10537 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10538 if (push_operand (operands[0], VOIDmode))
10540 operands[0] = copy_rtx (operands[0]);
10541 PUT_MODE (operands[0], Pmode);
10544 operands[0] = gen_lowpart (DImode, operands[0]);
10545 operands[1] = gen_lowpart (DImode, operands[1]);
10546 emit_move_insn (operands[0], operands[1]);
10550 /* The only non-offsettable memory we handle is push. */
10551 if (push_operand (operands[0], VOIDmode))
10553 else if (GET_CODE (operands[0]) == MEM
10554 && ! offsettable_memref_p (operands[0]))
10557 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10558 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10560 /* When emitting push, take care for source operands on the stack. */
10561 if (push && GET_CODE (operands[1]) == MEM
10562 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10565 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10566 XEXP (part[1][2], 0));
10567 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10568 XEXP (part[1][1], 0));
10571 /* We need to do copy in the right order in case an address register
10572 of the source overlaps the destination. */
10573 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10575 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10577 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10580 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10583 /* Collision in the middle part can be handled by reordering. */
10584 if (collisions == 1 && nparts == 3
10585 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10588 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10589 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10592 /* If there are more collisions, we can't handle it by reordering.
10593 Do an lea to the last part and use only one colliding move. */
10594 else if (collisions > 1)
10600 base = part[0][nparts - 1];
10602 /* Handle the case when the last part isn't valid for lea.
10603 Happens in 64-bit mode storing the 12-byte XFmode. */
10604 if (GET_MODE (base) != Pmode)
10605 base = gen_rtx_REG (Pmode, REGNO (base));
10607 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10608 part[1][0] = replace_equiv_address (part[1][0], base);
10609 part[1][1] = replace_equiv_address (part[1][1],
10610 plus_constant (base, UNITS_PER_WORD));
10612 part[1][2] = replace_equiv_address (part[1][2],
10613 plus_constant (base, 8));
10623 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10624 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10625 emit_move_insn (part[0][2], part[1][2]);
10630 /* In 64bit mode we don't have 32bit push available. In case this is
10631 register, it is OK - we will just use larger counterpart. We also
10632 retype memory - these comes from attempt to avoid REX prefix on
10633 moving of second half of TFmode value. */
10634 if (GET_MODE (part[1][1]) == SImode)
10636 if (GET_CODE (part[1][1]) == MEM)
10637 part[1][1] = adjust_address (part[1][1], DImode, 0);
10638 else if (REG_P (part[1][1]))
10639 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10642 if (GET_MODE (part[1][0]) == SImode)
10643 part[1][0] = part[1][1];
10646 emit_move_insn (part[0][1], part[1][1]);
10647 emit_move_insn (part[0][0], part[1][0]);
10651 /* Choose correct order to not overwrite the source before it is copied. */
10652 if ((REG_P (part[0][0])
10653 && REG_P (part[1][1])
10654 && (REGNO (part[0][0]) == REGNO (part[1][1])
10656 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10658 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10662 operands[2] = part[0][2];
10663 operands[3] = part[0][1];
10664 operands[4] = part[0][0];
10665 operands[5] = part[1][2];
10666 operands[6] = part[1][1];
10667 operands[7] = part[1][0];
10671 operands[2] = part[0][1];
10672 operands[3] = part[0][0];
10673 operands[5] = part[1][1];
10674 operands[6] = part[1][0];
10681 operands[2] = part[0][0];
10682 operands[3] = part[0][1];
10683 operands[4] = part[0][2];
10684 operands[5] = part[1][0];
10685 operands[6] = part[1][1];
10686 operands[7] = part[1][2];
10690 operands[2] = part[0][0];
10691 operands[3] = part[0][1];
10692 operands[5] = part[1][0];
10693 operands[6] = part[1][1];
10696 emit_move_insn (operands[2], operands[5]);
10697 emit_move_insn (operands[3], operands[6]);
10699 emit_move_insn (operands[4], operands[7]);
10705 ix86_split_ashldi (rtx *operands, rtx scratch)
10707 rtx low[2], high[2];
10710 if (GET_CODE (operands[2]) == CONST_INT)
10712 split_di (operands, 2, low, high);
10713 count = INTVAL (operands[2]) & 63;
10717 emit_move_insn (high[0], low[1]);
10718 emit_move_insn (low[0], const0_rtx);
10721 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10725 if (!rtx_equal_p (operands[0], operands[1]))
10726 emit_move_insn (operands[0], operands[1]);
10727 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10728 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10733 if (!rtx_equal_p (operands[0], operands[1]))
10734 emit_move_insn (operands[0], operands[1]);
10736 split_di (operands, 1, low, high);
10738 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10739 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10741 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10743 if (! no_new_pseudos)
10744 scratch = force_reg (SImode, const0_rtx);
10746 emit_move_insn (scratch, const0_rtx);
10748 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10752 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10757 ix86_split_ashrdi (rtx *operands, rtx scratch)
10759 rtx low[2], high[2];
10762 if (GET_CODE (operands[2]) == CONST_INT)
10764 split_di (operands, 2, low, high);
10765 count = INTVAL (operands[2]) & 63;
10769 emit_move_insn (low[0], high[1]);
10771 if (! reload_completed)
10772 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10775 emit_move_insn (high[0], low[0]);
10776 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10780 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10784 if (!rtx_equal_p (operands[0], operands[1]))
10785 emit_move_insn (operands[0], operands[1]);
10786 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10787 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10792 if (!rtx_equal_p (operands[0], operands[1]))
10793 emit_move_insn (operands[0], operands[1]);
10795 split_di (operands, 1, low, high);
10797 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10798 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10800 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10802 if (! no_new_pseudos)
10803 scratch = gen_reg_rtx (SImode);
10804 emit_move_insn (scratch, high[0]);
10805 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10806 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10810 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10815 ix86_split_lshrdi (rtx *operands, rtx scratch)
10817 rtx low[2], high[2];
10820 if (GET_CODE (operands[2]) == CONST_INT)
10822 split_di (operands, 2, low, high);
10823 count = INTVAL (operands[2]) & 63;
10827 emit_move_insn (low[0], high[1]);
10828 emit_move_insn (high[0], const0_rtx);
10831 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10835 if (!rtx_equal_p (operands[0], operands[1]))
10836 emit_move_insn (operands[0], operands[1]);
10837 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10838 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10843 if (!rtx_equal_p (operands[0], operands[1]))
10844 emit_move_insn (operands[0], operands[1]);
10846 split_di (operands, 1, low, high);
10848 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10849 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10851 /* Heh. By reversing the arguments, we can reuse this pattern. */
10852 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10854 if (! no_new_pseudos)
10855 scratch = force_reg (SImode, const0_rtx);
10857 emit_move_insn (scratch, const0_rtx);
10859 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10863 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10867 /* Helper function for the string operations below. Dest VARIABLE whether
10868 it is aligned to VALUE bytes. If true, jump to the label. */
10870 ix86_expand_aligntest (rtx variable, int value)
10872 rtx label = gen_label_rtx ();
10873 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10874 if (GET_MODE (variable) == DImode)
10875 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10877 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10878 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10883 /* Adjust COUNTER by the VALUE. */
10885 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10887 if (GET_MODE (countreg) == DImode)
10888 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10890 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10893 /* Zero extend possibly SImode EXP to Pmode register. */
10895 ix86_zero_extend_to_Pmode (rtx exp)
10898 if (GET_MODE (exp) == VOIDmode)
10899 return force_reg (Pmode, exp);
10900 if (GET_MODE (exp) == Pmode)
10901 return copy_to_mode_reg (Pmode, exp);
10902 r = gen_reg_rtx (Pmode);
10903 emit_insn (gen_zero_extendsidi2 (r, exp));
10907 /* Expand string move (memcpy) operation. Use i386 string operations when
10908 profitable. expand_clrstr contains similar code. */
10910 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10912 rtx srcreg, destreg, countreg;
10913 enum machine_mode counter_mode;
10914 HOST_WIDE_INT align = 0;
10915 unsigned HOST_WIDE_INT count = 0;
10918 if (GET_CODE (align_exp) == CONST_INT)
10919 align = INTVAL (align_exp);
10921 /* Can't use any of this if the user has appropriated esi or edi. */
10922 if (global_regs[4] || global_regs[5])
10925 /* This simple hack avoids all inlining code and simplifies code below. */
10926 if (!TARGET_ALIGN_STRINGOPS)
10929 if (GET_CODE (count_exp) == CONST_INT)
10931 count = INTVAL (count_exp);
10932 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10936 /* Figure out proper mode for counter. For 32bits it is always SImode,
10937 for 64bits use SImode when possible, otherwise DImode.
10938 Set count to number of bytes copied when known at compile time. */
10939 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10940 || x86_64_zero_extended_value (count_exp))
10941 counter_mode = SImode;
10943 counter_mode = DImode;
10947 if (counter_mode != SImode && counter_mode != DImode)
10950 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10951 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10953 emit_insn (gen_cld ());
10955 /* When optimizing for size emit simple rep ; movsb instruction for
10956 counts not divisible by 4. */
10958 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10960 countreg = ix86_zero_extend_to_Pmode (count_exp);
10962 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10963 destreg, srcreg, countreg));
10965 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10966 destreg, srcreg, countreg));
10969 /* For constant aligned (or small unaligned) copies use rep movsl
10970 followed by code copying the rest. For PentiumPro ensure 8 byte
10971 alignment to allow rep movsl acceleration. */
10973 else if (count != 0
10975 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10976 || optimize_size || count < (unsigned int) 64))
10978 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10979 if (count & ~(size - 1))
10981 countreg = copy_to_mode_reg (counter_mode,
10982 GEN_INT ((count >> (size == 4 ? 2 : 3))
10983 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10984 countreg = ix86_zero_extend_to_Pmode (countreg);
10988 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10989 destreg, srcreg, countreg));
10991 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10992 destreg, srcreg, countreg));
10995 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10996 destreg, srcreg, countreg));
10998 if (size == 8 && (count & 0x04))
10999 emit_insn (gen_strmovsi (destreg, srcreg));
11001 emit_insn (gen_strmovhi (destreg, srcreg));
11003 emit_insn (gen_strmovqi (destreg, srcreg));
11005 /* The generic code based on the glibc implementation:
11006 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11007 allowing accelerated copying there)
11008 - copy the data using rep movsl
11009 - copy the rest. */
11014 int desired_alignment = (TARGET_PENTIUMPRO
11015 && (count == 0 || count >= (unsigned int) 260)
11016 ? 8 : UNITS_PER_WORD);
11018 /* In case we don't know anything about the alignment, default to
11019 library version, since it is usually equally fast and result in
11022 Also emit call when we know that the count is large and call overhead
11023 will not be important. */
11024 if (!TARGET_INLINE_ALL_STRINGOPS
11025 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11031 if (TARGET_SINGLE_STRINGOP)
11032 emit_insn (gen_cld ());
11034 countreg2 = gen_reg_rtx (Pmode);
11035 countreg = copy_to_mode_reg (counter_mode, count_exp);
11037 /* We don't use loops to align destination and to copy parts smaller
11038 than 4 bytes, because gcc is able to optimize such code better (in
11039 the case the destination or the count really is aligned, gcc is often
11040 able to predict the branches) and also it is friendlier to the
11041 hardware branch prediction.
11043 Using loops is beneficial for generic case, because we can
11044 handle small counts using the loops. Many CPUs (such as Athlon)
11045 have large REP prefix setup costs.
11047 This is quite costly. Maybe we can revisit this decision later or
11048 add some customizability to this code. */
11050 if (count == 0 && align < desired_alignment)
11052 label = gen_label_rtx ();
11053 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11054 LEU, 0, counter_mode, 1, label);
11058 rtx label = ix86_expand_aligntest (destreg, 1);
11059 emit_insn (gen_strmovqi (destreg, srcreg));
11060 ix86_adjust_counter (countreg, 1);
11061 emit_label (label);
11062 LABEL_NUSES (label) = 1;
11066 rtx label = ix86_expand_aligntest (destreg, 2);
11067 emit_insn (gen_strmovhi (destreg, srcreg));
11068 ix86_adjust_counter (countreg, 2);
11069 emit_label (label);
11070 LABEL_NUSES (label) = 1;
11072 if (align <= 4 && desired_alignment > 4)
11074 rtx label = ix86_expand_aligntest (destreg, 4);
11075 emit_insn (gen_strmovsi (destreg, srcreg));
11076 ix86_adjust_counter (countreg, 4);
11077 emit_label (label);
11078 LABEL_NUSES (label) = 1;
11081 if (label && desired_alignment > 4 && !TARGET_64BIT)
11083 emit_label (label);
11084 LABEL_NUSES (label) = 1;
11087 if (!TARGET_SINGLE_STRINGOP)
11088 emit_insn (gen_cld ());
11091 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11093 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11094 destreg, srcreg, countreg2));
11098 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11099 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11100 destreg, srcreg, countreg2));
11105 emit_label (label);
11106 LABEL_NUSES (label) = 1;
11108 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11109 emit_insn (gen_strmovsi (destreg, srcreg));
11110 if ((align <= 4 || count == 0) && TARGET_64BIT)
11112 rtx label = ix86_expand_aligntest (countreg, 4);
11113 emit_insn (gen_strmovsi (destreg, srcreg));
11114 emit_label (label);
11115 LABEL_NUSES (label) = 1;
11117 if (align > 2 && count != 0 && (count & 2))
11118 emit_insn (gen_strmovhi (destreg, srcreg));
11119 if (align <= 2 || count == 0)
11121 rtx label = ix86_expand_aligntest (countreg, 2);
11122 emit_insn (gen_strmovhi (destreg, srcreg));
11123 emit_label (label);
11124 LABEL_NUSES (label) = 1;
11126 if (align > 1 && count != 0 && (count & 1))
11127 emit_insn (gen_strmovqi (destreg, srcreg));
11128 if (align <= 1 || count == 0)
11130 rtx label = ix86_expand_aligntest (countreg, 1);
11131 emit_insn (gen_strmovqi (destreg, srcreg));
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11137 insns = get_insns ();
11140 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11145 /* Expand string clear operation (bzero). Use i386 string operations when
11146 profitable. expand_movstr contains similar code. */
11148 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11150 rtx destreg, zeroreg, countreg;
11151 enum machine_mode counter_mode;
11152 HOST_WIDE_INT align = 0;
11153 unsigned HOST_WIDE_INT count = 0;
11155 if (GET_CODE (align_exp) == CONST_INT)
11156 align = INTVAL (align_exp);
11158 /* Can't use any of this if the user has appropriated esi. */
11159 if (global_regs[4])
11162 /* This simple hack avoids all inlining code and simplifies code below. */
11163 if (!TARGET_ALIGN_STRINGOPS)
11166 if (GET_CODE (count_exp) == CONST_INT)
11168 count = INTVAL (count_exp);
11169 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11172 /* Figure out proper mode for counter. For 32bits it is always SImode,
11173 for 64bits use SImode when possible, otherwise DImode.
11174 Set count to number of bytes copied when known at compile time. */
11175 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11176 || x86_64_zero_extended_value (count_exp))
11177 counter_mode = SImode;
11179 counter_mode = DImode;
11181 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11183 emit_insn (gen_cld ());
11185 /* When optimizing for size emit simple rep ; movsb instruction for
11186 counts not divisible by 4. */
11188 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11190 countreg = ix86_zero_extend_to_Pmode (count_exp);
11191 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11193 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11194 destreg, countreg));
11196 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11197 destreg, countreg));
11199 else if (count != 0
11201 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11202 || optimize_size || count < (unsigned int) 64))
11204 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11205 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11206 if (count & ~(size - 1))
11208 countreg = copy_to_mode_reg (counter_mode,
11209 GEN_INT ((count >> (size == 4 ? 2 : 3))
11210 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11211 countreg = ix86_zero_extend_to_Pmode (countreg);
11215 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11216 destreg, countreg));
11218 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11219 destreg, countreg));
11222 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11223 destreg, countreg));
11225 if (size == 8 && (count & 0x04))
11226 emit_insn (gen_strsetsi (destreg,
11227 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11229 emit_insn (gen_strsethi (destreg,
11230 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11232 emit_insn (gen_strsetqi (destreg,
11233 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11239 /* Compute desired alignment of the string operation. */
11240 int desired_alignment = (TARGET_PENTIUMPRO
11241 && (count == 0 || count >= (unsigned int) 260)
11242 ? 8 : UNITS_PER_WORD);
11244 /* In case we don't know anything about the alignment, default to
11245 library version, since it is usually equally fast and result in
11248 Also emit call when we know that the count is large and call overhead
11249 will not be important. */
11250 if (!TARGET_INLINE_ALL_STRINGOPS
11251 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11254 if (TARGET_SINGLE_STRINGOP)
11255 emit_insn (gen_cld ());
11257 countreg2 = gen_reg_rtx (Pmode);
11258 countreg = copy_to_mode_reg (counter_mode, count_exp);
11259 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11261 if (count == 0 && align < desired_alignment)
11263 label = gen_label_rtx ();
11264 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11265 LEU, 0, counter_mode, 1, label);
11269 rtx label = ix86_expand_aligntest (destreg, 1);
11270 emit_insn (gen_strsetqi (destreg,
11271 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11272 ix86_adjust_counter (countreg, 1);
11273 emit_label (label);
11274 LABEL_NUSES (label) = 1;
11278 rtx label = ix86_expand_aligntest (destreg, 2);
11279 emit_insn (gen_strsethi (destreg,
11280 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11281 ix86_adjust_counter (countreg, 2);
11282 emit_label (label);
11283 LABEL_NUSES (label) = 1;
11285 if (align <= 4 && desired_alignment > 4)
11287 rtx label = ix86_expand_aligntest (destreg, 4);
11288 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11289 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11291 ix86_adjust_counter (countreg, 4);
11292 emit_label (label);
11293 LABEL_NUSES (label) = 1;
11296 if (label && desired_alignment > 4 && !TARGET_64BIT)
11298 emit_label (label);
11299 LABEL_NUSES (label) = 1;
11303 if (!TARGET_SINGLE_STRINGOP)
11304 emit_insn (gen_cld ());
11307 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11309 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11310 destreg, countreg2));
11314 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11315 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11316 destreg, countreg2));
11320 emit_label (label);
11321 LABEL_NUSES (label) = 1;
11324 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11325 emit_insn (gen_strsetsi (destreg,
11326 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11327 if (TARGET_64BIT && (align <= 4 || count == 0))
11329 rtx label = ix86_expand_aligntest (countreg, 4);
11330 emit_insn (gen_strsetsi (destreg,
11331 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11332 emit_label (label);
11333 LABEL_NUSES (label) = 1;
11335 if (align > 2 && count != 0 && (count & 2))
11336 emit_insn (gen_strsethi (destreg,
11337 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11338 if (align <= 2 || count == 0)
11340 rtx label = ix86_expand_aligntest (countreg, 2);
11341 emit_insn (gen_strsethi (destreg,
11342 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11343 emit_label (label);
11344 LABEL_NUSES (label) = 1;
11346 if (align > 1 && count != 0 && (count & 1))
11347 emit_insn (gen_strsetqi (destreg,
11348 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11349 if (align <= 1 || count == 0)
11351 rtx label = ix86_expand_aligntest (countreg, 1);
11352 emit_insn (gen_strsetqi (destreg,
11353 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11354 emit_label (label);
11355 LABEL_NUSES (label) = 1;
11360 /* Expand strlen. */
11362 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11364 rtx addr, scratch1, scratch2, scratch3, scratch4;
11366 /* The generic case of strlen expander is long. Avoid it's
11367 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11369 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11370 && !TARGET_INLINE_ALL_STRINGOPS
11372 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11375 addr = force_reg (Pmode, XEXP (src, 0));
11376 scratch1 = gen_reg_rtx (Pmode);
11378 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11381 /* Well it seems that some optimizer does not combine a call like
11382 foo(strlen(bar), strlen(bar));
11383 when the move and the subtraction is done here. It does calculate
11384 the length just once when these instructions are done inside of
11385 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11386 often used and I use one fewer register for the lifetime of
11387 output_strlen_unroll() this is better. */
11389 emit_move_insn (out, addr);
11391 ix86_expand_strlensi_unroll_1 (out, align);
11393 /* strlensi_unroll_1 returns the address of the zero at the end of
11394 the string, like memchr(), so compute the length by subtracting
11395 the start address. */
11397 emit_insn (gen_subdi3 (out, out, addr));
11399 emit_insn (gen_subsi3 (out, out, addr));
11403 scratch2 = gen_reg_rtx (Pmode);
11404 scratch3 = gen_reg_rtx (Pmode);
11405 scratch4 = force_reg (Pmode, constm1_rtx);
11407 emit_move_insn (scratch3, addr);
11408 eoschar = force_reg (QImode, eoschar);
11410 emit_insn (gen_cld ());
11413 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11414 align, scratch4, scratch3));
11415 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11416 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11420 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11421 align, scratch4, scratch3));
11422 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11423 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11429 /* Expand the appropriate insns for doing strlen if not just doing
11432 out = result, initialized with the start address
11433 align_rtx = alignment of the address.
11434 scratch = scratch register, initialized with the startaddress when
11435 not aligned, otherwise undefined
11437 This is just the body. It needs the initializations mentioned above and
11438 some address computing at the end. These things are done in i386.md. */
11441 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11445 rtx align_2_label = NULL_RTX;
11446 rtx align_3_label = NULL_RTX;
11447 rtx align_4_label = gen_label_rtx ();
11448 rtx end_0_label = gen_label_rtx ();
11450 rtx tmpreg = gen_reg_rtx (SImode);
11451 rtx scratch = gen_reg_rtx (SImode);
11455 if (GET_CODE (align_rtx) == CONST_INT)
11456 align = INTVAL (align_rtx);
11458 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11460 /* Is there a known alignment and is it less than 4? */
11463 rtx scratch1 = gen_reg_rtx (Pmode);
11464 emit_move_insn (scratch1, out);
11465 /* Is there a known alignment and is it not 2? */
11468 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11469 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11471 /* Leave just the 3 lower bits. */
11472 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11473 NULL_RTX, 0, OPTAB_WIDEN);
11475 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11476 Pmode, 1, align_4_label);
11477 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11478 Pmode, 1, align_2_label);
11479 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11480 Pmode, 1, align_3_label);
11484 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11485 check if is aligned to 4 - byte. */
11487 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11488 NULL_RTX, 0, OPTAB_WIDEN);
11490 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11491 Pmode, 1, align_4_label);
11494 mem = gen_rtx_MEM (QImode, out);
11496 /* Now compare the bytes. */
11498 /* Compare the first n unaligned byte on a byte per byte basis. */
11499 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11500 QImode, 1, end_0_label);
11502 /* Increment the address. */
11504 emit_insn (gen_adddi3 (out, out, const1_rtx));
11506 emit_insn (gen_addsi3 (out, out, const1_rtx));
11508 /* Not needed with an alignment of 2 */
11511 emit_label (align_2_label);
11513 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11517 emit_insn (gen_adddi3 (out, out, const1_rtx));
11519 emit_insn (gen_addsi3 (out, out, const1_rtx));
11521 emit_label (align_3_label);
11524 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11528 emit_insn (gen_adddi3 (out, out, const1_rtx));
11530 emit_insn (gen_addsi3 (out, out, const1_rtx));
11533 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11534 align this loop. It gives only huge programs, but does not help to
11536 emit_label (align_4_label);
11538 mem = gen_rtx_MEM (SImode, out);
11539 emit_move_insn (scratch, mem);
11541 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11543 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11545 /* This formula yields a nonzero result iff one of the bytes is zero.
11546 This saves three branches inside loop and many cycles. */
11548 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11549 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11550 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11551 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11552 gen_int_mode (0x80808080, SImode)));
11553 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11558 rtx reg = gen_reg_rtx (SImode);
11559 rtx reg2 = gen_reg_rtx (Pmode);
11560 emit_move_insn (reg, tmpreg);
11561 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11563 /* If zero is not in the first two bytes, move two bytes forward. */
11564 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11565 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11566 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11567 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11568 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11571 /* Emit lea manually to avoid clobbering of flags. */
11572 emit_insn (gen_rtx_SET (SImode, reg2,
11573 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11575 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11576 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11577 emit_insn (gen_rtx_SET (VOIDmode, out,
11578 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11585 rtx end_2_label = gen_label_rtx ();
11586 /* Is zero in the first two bytes? */
11588 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11589 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11590 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11591 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11592 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11594 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11595 JUMP_LABEL (tmp) = end_2_label;
11597 /* Not in the first two. Move two bytes forward. */
11598 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11600 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11602 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11604 emit_label (end_2_label);
11608 /* Avoid branch in fixing the byte. */
11609 tmpreg = gen_lowpart (QImode, tmpreg);
11610 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11611 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11613 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11615 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11617 emit_label (end_0_label);
11621 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11622 rtx pop, int sibcall)
11624 rtx use = NULL, call;
11626 if (pop == const0_rtx)
11628 if (TARGET_64BIT && pop)
11632 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11633 fnaddr = machopic_indirect_call_target (fnaddr);
11635 /* Static functions and indirect calls don't need the pic register. */
11636 if (! TARGET_64BIT && flag_pic
11637 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11638 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11639 use_reg (&use, pic_offset_table_rtx);
11641 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11643 rtx al = gen_rtx_REG (QImode, 0);
11644 emit_move_insn (al, callarg2);
11645 use_reg (&use, al);
11647 #endif /* TARGET_MACHO */
11649 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11651 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11652 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11654 if (sibcall && TARGET_64BIT
11655 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11658 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11659 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11660 emit_move_insn (fnaddr, addr);
11661 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11664 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11666 call = gen_rtx_SET (VOIDmode, retval, call);
11669 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11670 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11671 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11674 call = emit_call_insn (call);
11676 CALL_INSN_FUNCTION_USAGE (call) = use;
11680 /* Clear stack slot assignments remembered from previous functions.
11681 This is called from INIT_EXPANDERS once before RTL is emitted for each
11684 static struct machine_function *
11685 ix86_init_machine_status (void)
11687 struct machine_function *f;
11689 f = ggc_alloc_cleared (sizeof (struct machine_function));
11690 f->use_fast_prologue_epilogue_nregs = -1;
11695 /* Return a MEM corresponding to a stack slot with mode MODE.
11696 Allocate a new slot if necessary.
11698 The RTL for a function can have several slots available: N is
11699 which slot to use. */
11702 assign_386_stack_local (enum machine_mode mode, int n)
11704 struct stack_local_entry *s;
11706 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11709 for (s = ix86_stack_locals; s; s = s->next)
11710 if (s->mode == mode && s->n == n)
11713 s = (struct stack_local_entry *)
11714 ggc_alloc (sizeof (struct stack_local_entry));
11717 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11719 s->next = ix86_stack_locals;
11720 ix86_stack_locals = s;
11724 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11726 static GTY(()) rtx ix86_tls_symbol;
11728 ix86_tls_get_addr (void)
11731 if (!ix86_tls_symbol)
11733 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11734 (TARGET_GNU_TLS && !TARGET_64BIT)
11735 ? "___tls_get_addr"
11736 : "__tls_get_addr");
11739 return ix86_tls_symbol;
11742 /* Calculate the length of the memory address in the instruction
11743 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11746 memory_address_length (rtx addr)
11748 struct ix86_address parts;
11749 rtx base, index, disp;
11752 if (GET_CODE (addr) == PRE_DEC
11753 || GET_CODE (addr) == POST_INC
11754 || GET_CODE (addr) == PRE_MODIFY
11755 || GET_CODE (addr) == POST_MODIFY)
11758 if (! ix86_decompose_address (addr, &parts))
11762 index = parts.index;
11767 - esp as the base always wants an index,
11768 - ebp as the base always wants a displacement. */
11770 /* Register Indirect. */
11771 if (base && !index && !disp)
11773 /* esp (for its index) and ebp (for its displacement) need
11774 the two-byte modrm form. */
11775 if (addr == stack_pointer_rtx
11776 || addr == arg_pointer_rtx
11777 || addr == frame_pointer_rtx
11778 || addr == hard_frame_pointer_rtx)
11782 /* Direct Addressing. */
11783 else if (disp && !base && !index)
11788 /* Find the length of the displacement constant. */
11791 if (GET_CODE (disp) == CONST_INT
11792 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11798 /* ebp always wants a displacement. */
11799 else if (base == hard_frame_pointer_rtx)
11802 /* An index requires the two-byte modrm form... */
11804 /* ...like esp, which always wants an index. */
11805 || base == stack_pointer_rtx
11806 || base == arg_pointer_rtx
11807 || base == frame_pointer_rtx)
11814 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11815 is set, expect that insn have 8bit immediate alternative. */
11817 ix86_attr_length_immediate_default (rtx insn, int shortform)
11821 extract_insn_cached (insn);
11822 for (i = recog_data.n_operands - 1; i >= 0; --i)
11823 if (CONSTANT_P (recog_data.operand[i]))
11828 && GET_CODE (recog_data.operand[i]) == CONST_INT
11829 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11833 switch (get_attr_mode (insn))
11844 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11849 fatal_insn ("unknown insn mode", insn);
11855 /* Compute default value for "length_address" attribute. */
11857 ix86_attr_length_address_default (rtx insn)
11861 if (get_attr_type (insn) == TYPE_LEA)
11863 rtx set = PATTERN (insn);
11864 if (GET_CODE (set) == SET)
11866 else if (GET_CODE (set) == PARALLEL
11867 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11868 set = XVECEXP (set, 0, 0);
11871 #ifdef ENABLE_CHECKING
11877 return memory_address_length (SET_SRC (set));
11880 extract_insn_cached (insn);
11881 for (i = recog_data.n_operands - 1; i >= 0; --i)
11882 if (GET_CODE (recog_data.operand[i]) == MEM)
11884 return memory_address_length (XEXP (recog_data.operand[i], 0));
11890 /* Return the maximum number of instructions a cpu can issue. */
11893 ix86_issue_rate (void)
11897 case PROCESSOR_PENTIUM:
11901 case PROCESSOR_PENTIUMPRO:
11902 case PROCESSOR_PENTIUM4:
11903 case PROCESSOR_ATHLON:
11912 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11913 by DEP_INSN and nothing set by DEP_INSN. */
11916 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11920 /* Simplify the test for uninteresting insns. */
11921 if (insn_type != TYPE_SETCC
11922 && insn_type != TYPE_ICMOV
11923 && insn_type != TYPE_FCMOV
11924 && insn_type != TYPE_IBR)
11927 if ((set = single_set (dep_insn)) != 0)
11929 set = SET_DEST (set);
11932 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11933 && XVECLEN (PATTERN (dep_insn), 0) == 2
11934 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11935 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11937 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11938 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11943 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11946 /* This test is true if the dependent insn reads the flags but
11947 not any other potentially set register. */
11948 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11951 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11957 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11958 address with operands set by DEP_INSN. */
11961 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11965 if (insn_type == TYPE_LEA
11968 addr = PATTERN (insn);
11969 if (GET_CODE (addr) == SET)
11971 else if (GET_CODE (addr) == PARALLEL
11972 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11973 addr = XVECEXP (addr, 0, 0);
11976 addr = SET_SRC (addr);
11981 extract_insn_cached (insn);
11982 for (i = recog_data.n_operands - 1; i >= 0; --i)
11983 if (GET_CODE (recog_data.operand[i]) == MEM)
11985 addr = XEXP (recog_data.operand[i], 0);
11992 return modified_in_p (addr, dep_insn);
11996 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11998 enum attr_type insn_type, dep_insn_type;
11999 enum attr_memory memory, dep_memory;
12001 int dep_insn_code_number;
12003 /* Anti and output dependencies have zero cost on all CPUs. */
12004 if (REG_NOTE_KIND (link) != 0)
12007 dep_insn_code_number = recog_memoized (dep_insn);
12009 /* If we can't recognize the insns, we can't really do anything. */
12010 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12013 insn_type = get_attr_type (insn);
12014 dep_insn_type = get_attr_type (dep_insn);
12018 case PROCESSOR_PENTIUM:
12019 /* Address Generation Interlock adds a cycle of latency. */
12020 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12023 /* ??? Compares pair with jump/setcc. */
12024 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12027 /* Floating point stores require value to be ready one cycle earlier. */
12028 if (insn_type == TYPE_FMOV
12029 && get_attr_memory (insn) == MEMORY_STORE
12030 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12034 case PROCESSOR_PENTIUMPRO:
12035 memory = get_attr_memory (insn);
12036 dep_memory = get_attr_memory (dep_insn);
12038 /* Since we can't represent delayed latencies of load+operation,
12039 increase the cost here for non-imov insns. */
12040 if (dep_insn_type != TYPE_IMOV
12041 && dep_insn_type != TYPE_FMOV
12042 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12045 /* INT->FP conversion is expensive. */
12046 if (get_attr_fp_int_src (dep_insn))
12049 /* There is one cycle extra latency between an FP op and a store. */
12050 if (insn_type == TYPE_FMOV
12051 && (set = single_set (dep_insn)) != NULL_RTX
12052 && (set2 = single_set (insn)) != NULL_RTX
12053 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12054 && GET_CODE (SET_DEST (set2)) == MEM)
12057 /* Show ability of reorder buffer to hide latency of load by executing
12058 in parallel with previous instruction in case
12059 previous instruction is not needed to compute the address. */
12060 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12061 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12063 /* Claim moves to take one cycle, as core can issue one load
12064 at time and the next load can start cycle later. */
12065 if (dep_insn_type == TYPE_IMOV
12066 || dep_insn_type == TYPE_FMOV)
12074 memory = get_attr_memory (insn);
12075 dep_memory = get_attr_memory (dep_insn);
12076 /* The esp dependency is resolved before the instruction is really
12078 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12079 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12082 /* Since we can't represent delayed latencies of load+operation,
12083 increase the cost here for non-imov insns. */
12084 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12085 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12087 /* INT->FP conversion is expensive. */
12088 if (get_attr_fp_int_src (dep_insn))
12091 /* Show ability of reorder buffer to hide latency of load by executing
12092 in parallel with previous instruction in case
12093 previous instruction is not needed to compute the address. */
12094 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12095 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12097 /* Claim moves to take one cycle, as core can issue one load
12098 at time and the next load can start cycle later. */
12099 if (dep_insn_type == TYPE_IMOV
12100 || dep_insn_type == TYPE_FMOV)
12109 case PROCESSOR_ATHLON:
12111 memory = get_attr_memory (insn);
12112 dep_memory = get_attr_memory (dep_insn);
12114 /* Show ability of reorder buffer to hide latency of load by executing
12115 in parallel with previous instruction in case
12116 previous instruction is not needed to compute the address. */
12117 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12118 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12120 enum attr_unit unit = get_attr_unit (insn);
12123 /* Because of the difference between the length of integer and
12124 floating unit pipeline preparation stages, the memory operands
12125 for floating point are cheaper.
12127 ??? For Athlon it the difference is most probably 2. */
12128 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12131 loadcost = TARGET_ATHLON ? 2 : 0;
12133 if (cost >= loadcost)
12148 struct ppro_sched_data
12151 int issued_this_cycle;
12155 static enum attr_ppro_uops
12156 ix86_safe_ppro_uops (rtx insn)
12158 if (recog_memoized (insn) >= 0)
12159 return get_attr_ppro_uops (insn);
12161 return PPRO_UOPS_MANY;
12165 ix86_dump_ppro_packet (FILE *dump)
12167 if (ix86_sched_data.ppro.decode[0])
12169 fprintf (dump, "PPRO packet: %d",
12170 INSN_UID (ix86_sched_data.ppro.decode[0]));
12171 if (ix86_sched_data.ppro.decode[1])
12172 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12173 if (ix86_sched_data.ppro.decode[2])
12174 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12175 fputc ('\n', dump);
12179 /* We're beginning a new block. Initialize data structures as necessary. */
12182 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12183 int sched_verbose ATTRIBUTE_UNUSED,
12184 int veclen ATTRIBUTE_UNUSED)
12186 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12189 /* Shift INSN to SLOT, and shift everything else down. */
12192 ix86_reorder_insn (rtx *insnp, rtx *slot)
12198 insnp[0] = insnp[1];
12199 while (++insnp != slot);
12205 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12208 enum attr_ppro_uops cur_uops;
12209 int issued_this_cycle;
12213 /* At this point .ppro.decode contains the state of the three
12214 decoders from last "cycle". That is, those insns that were
12215 actually independent. But here we're scheduling for the
12216 decoder, and we may find things that are decodable in the
12219 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12220 issued_this_cycle = 0;
12223 cur_uops = ix86_safe_ppro_uops (*insnp);
12225 /* If the decoders are empty, and we've a complex insn at the
12226 head of the priority queue, let it issue without complaint. */
12227 if (decode[0] == NULL)
12229 if (cur_uops == PPRO_UOPS_MANY)
12231 decode[0] = *insnp;
12235 /* Otherwise, search for a 2-4 uop unsn to issue. */
12236 while (cur_uops != PPRO_UOPS_FEW)
12238 if (insnp == ready)
12240 cur_uops = ix86_safe_ppro_uops (*--insnp);
12243 /* If so, move it to the head of the line. */
12244 if (cur_uops == PPRO_UOPS_FEW)
12245 ix86_reorder_insn (insnp, e_ready);
12247 /* Issue the head of the queue. */
12248 issued_this_cycle = 1;
12249 decode[0] = *e_ready--;
12252 /* Look for simple insns to fill in the other two slots. */
12253 for (i = 1; i < 3; ++i)
12254 if (decode[i] == NULL)
12256 if (ready > e_ready)
12260 cur_uops = ix86_safe_ppro_uops (*insnp);
12261 while (cur_uops != PPRO_UOPS_ONE)
12263 if (insnp == ready)
12265 cur_uops = ix86_safe_ppro_uops (*--insnp);
12268 /* Found one. Move it to the head of the queue and issue it. */
12269 if (cur_uops == PPRO_UOPS_ONE)
12271 ix86_reorder_insn (insnp, e_ready);
12272 decode[i] = *e_ready--;
12273 issued_this_cycle++;
12277 /* ??? Didn't find one. Ideally, here we would do a lazy split
12278 of 2-uop insns, issue one and queue the other. */
12282 if (issued_this_cycle == 0)
12283 issued_this_cycle = 1;
12284 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12287 /* We are about to being issuing insns for this clock cycle.
12288 Override the default sort algorithm to better slot instructions. */
12290 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12291 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12292 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12294 int n_ready = *n_readyp;
12295 rtx *e_ready = ready + n_ready - 1;
12297 /* Make sure to go ahead and initialize key items in
12298 ix86_sched_data if we are not going to bother trying to
12299 reorder the ready queue. */
12302 ix86_sched_data.ppro.issued_this_cycle = 1;
12311 case PROCESSOR_PENTIUMPRO:
12312 ix86_sched_reorder_ppro (ready, e_ready);
12317 return ix86_issue_rate ();
12320 /* We are about to issue INSN. Return the number of insns left on the
12321 ready queue that can be issued this cycle. */
12324 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12325 int can_issue_more)
12331 return can_issue_more - 1;
12333 case PROCESSOR_PENTIUMPRO:
12335 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12337 if (uops == PPRO_UOPS_MANY)
12340 ix86_dump_ppro_packet (dump);
12341 ix86_sched_data.ppro.decode[0] = insn;
12342 ix86_sched_data.ppro.decode[1] = NULL;
12343 ix86_sched_data.ppro.decode[2] = NULL;
12345 ix86_dump_ppro_packet (dump);
12346 ix86_sched_data.ppro.decode[0] = NULL;
12348 else if (uops == PPRO_UOPS_FEW)
12351 ix86_dump_ppro_packet (dump);
12352 ix86_sched_data.ppro.decode[0] = insn;
12353 ix86_sched_data.ppro.decode[1] = NULL;
12354 ix86_sched_data.ppro.decode[2] = NULL;
12358 for (i = 0; i < 3; ++i)
12359 if (ix86_sched_data.ppro.decode[i] == NULL)
12361 ix86_sched_data.ppro.decode[i] = insn;
12369 ix86_dump_ppro_packet (dump);
12370 ix86_sched_data.ppro.decode[0] = NULL;
12371 ix86_sched_data.ppro.decode[1] = NULL;
12372 ix86_sched_data.ppro.decode[2] = NULL;
12376 return --ix86_sched_data.ppro.issued_this_cycle;
12381 ia32_use_dfa_pipeline_interface (void)
12383 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12388 /* How many alternative schedules to try. This should be as wide as the
12389 scheduling freedom in the DFA, but no wider. Making this value too
12390 large results extra work for the scheduler. */
12393 ia32_multipass_dfa_lookahead (void)
12395 if (ix86_tune == PROCESSOR_PENTIUM)
12402 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12403 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12407 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12412 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12414 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12418 /* Subroutine of above to actually do the updating by recursively walking
12422 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12425 enum rtx_code code = GET_CODE (x);
12426 const char *format_ptr = GET_RTX_FORMAT (code);
12429 if (code == MEM && XEXP (x, 0) == dstreg)
12430 MEM_COPY_ATTRIBUTES (x, dstref);
12431 else if (code == MEM && XEXP (x, 0) == srcreg)
12432 MEM_COPY_ATTRIBUTES (x, srcref);
12434 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12436 if (*format_ptr == 'e')
12437 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12439 else if (*format_ptr == 'E')
12440 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12441 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12446 /* Compute the alignment given to a constant that is being placed in memory.
12447 EXP is the constant and ALIGN is the alignment that the object would
12449 The value of this function is used instead of that alignment to align
12453 ix86_constant_alignment (tree exp, int align)
12455 if (TREE_CODE (exp) == REAL_CST)
12457 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12459 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12462 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12469 /* Compute the alignment for a static variable.
12470 TYPE is the data type, and ALIGN is the alignment that
12471 the object would ordinarily have. The value of this function is used
12472 instead of that alignment to align the object. */
12475 ix86_data_alignment (tree type, int align)
12477 if (AGGREGATE_TYPE_P (type)
12478 && TYPE_SIZE (type)
12479 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12480 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12481 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12484 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12485 to 16byte boundary. */
12488 if (AGGREGATE_TYPE_P (type)
12489 && TYPE_SIZE (type)
12490 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12491 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12492 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12496 if (TREE_CODE (type) == ARRAY_TYPE)
12498 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12500 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12503 else if (TREE_CODE (type) == COMPLEX_TYPE)
12506 if (TYPE_MODE (type) == DCmode && align < 64)
12508 if (TYPE_MODE (type) == XCmode && align < 128)
12511 else if ((TREE_CODE (type) == RECORD_TYPE
12512 || TREE_CODE (type) == UNION_TYPE
12513 || TREE_CODE (type) == QUAL_UNION_TYPE)
12514 && TYPE_FIELDS (type))
12516 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12518 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12521 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12522 || TREE_CODE (type) == INTEGER_TYPE)
12524 if (TYPE_MODE (type) == DFmode && align < 64)
12526 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12533 /* Compute the alignment for a local variable.
12534 TYPE is the data type, and ALIGN is the alignment that
12535 the object would ordinarily have. The value of this macro is used
12536 instead of that alignment to align the object. */
12539 ix86_local_alignment (tree type, int align)
12541 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12542 to 16byte boundary. */
12545 if (AGGREGATE_TYPE_P (type)
12546 && TYPE_SIZE (type)
12547 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12548 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12549 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12552 if (TREE_CODE (type) == ARRAY_TYPE)
12554 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12556 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12559 else if (TREE_CODE (type) == COMPLEX_TYPE)
12561 if (TYPE_MODE (type) == DCmode && align < 64)
12563 if (TYPE_MODE (type) == XCmode && align < 128)
12566 else if ((TREE_CODE (type) == RECORD_TYPE
12567 || TREE_CODE (type) == UNION_TYPE
12568 || TREE_CODE (type) == QUAL_UNION_TYPE)
12569 && TYPE_FIELDS (type))
12571 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12573 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12576 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12577 || TREE_CODE (type) == INTEGER_TYPE)
12580 if (TYPE_MODE (type) == DFmode && align < 64)
12582 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12588 /* Emit RTL insns to initialize the variable parts of a trampoline.
12589 FNADDR is an RTX for the address of the function's pure code.
12590 CXT is an RTX for the static chain value for the function. */
12592 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12596 /* Compute offset from the end of the jmp to the target function. */
12597 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12598 plus_constant (tramp, 10),
12599 NULL_RTX, 1, OPTAB_DIRECT);
12600 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12601 gen_int_mode (0xb9, QImode));
12602 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12603 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12604 gen_int_mode (0xe9, QImode));
12605 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12610 /* Try to load address using shorter movl instead of movabs.
12611 We may want to support movq for kernel mode, but kernel does not use
12612 trampolines at the moment. */
12613 if (x86_64_zero_extended_value (fnaddr))
12615 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12616 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12617 gen_int_mode (0xbb41, HImode));
12618 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12619 gen_lowpart (SImode, fnaddr));
12624 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12625 gen_int_mode (0xbb49, HImode));
12626 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12630 /* Load static chain using movabs to r10. */
12631 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12632 gen_int_mode (0xba49, HImode));
12633 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12636 /* Jump to the r11 */
12637 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12638 gen_int_mode (0xff49, HImode));
12639 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12640 gen_int_mode (0xe3, QImode));
12642 if (offset > TRAMPOLINE_SIZE)
12646 #ifdef TRANSFER_FROM_TRAMPOLINE
12647 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12648 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12652 #define def_builtin(MASK, NAME, TYPE, CODE) \
12654 if ((MASK) & target_flags \
12655 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12656 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12657 NULL, NULL_TREE); \
12660 struct builtin_description
12662 const unsigned int mask;
12663 const enum insn_code icode;
12664 const char *const name;
12665 const enum ix86_builtins code;
12666 const enum rtx_code comparison;
12667 const unsigned int flag;
12670 static const struct builtin_description bdesc_comi[] =
12672 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12673 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12674 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12675 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12676 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12677 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12678 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12679 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12680 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12681 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12682 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12683 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12685 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12686 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12687 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12688 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12689 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12690 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12691 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12692 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12693 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12698 static const struct builtin_description bdesc_2arg[] =
12701 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12704 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12705 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12706 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12707 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12708 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12710 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12711 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12712 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12713 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12714 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12715 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12716 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12717 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12718 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12719 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12720 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12721 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12722 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12723 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12724 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12725 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12726 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12727 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12728 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12729 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12731 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12732 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12733 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12734 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12736 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12737 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12738 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12739 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12741 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12742 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12743 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12744 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12745 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12748 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12749 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12750 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12752 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12753 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12754 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12755 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12757 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12758 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12760 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12761 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12762 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12763 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12764 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12767 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12768 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12771 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12772 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12773 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12775 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12776 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12778 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12779 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12780 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12781 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12782 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12783 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12785 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12786 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12787 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12788 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12793 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12794 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12795 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12798 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12799 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12800 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12802 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12803 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12804 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12806 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12807 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12808 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12809 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12813 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12814 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12815 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12816 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12817 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12820 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12821 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12822 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12823 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12825 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12826 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12839 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12840 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12841 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12842 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12843 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12844 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12845 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12846 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12847 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12848 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12849 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12850 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12851 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12852 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12853 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12854 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12855 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12856 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12857 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12859 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12860 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12883 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12884 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12885 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12886 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12887 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12888 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12889 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12890 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12955 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12960 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12961 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12962 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12963 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12964 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12965 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12968 static const struct builtin_description bdesc_1arg[] =
12970 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12971 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12973 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12974 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12975 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12977 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12978 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12979 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12980 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12981 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12982 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13004 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13005 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13014 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13015 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13016 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13020 ix86_init_builtins (void)
13023 ix86_init_mmx_sse_builtins ();
13026 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13027 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13030 ix86_init_mmx_sse_builtins (void)
13032 const struct builtin_description * d;
13035 tree pchar_type_node = build_pointer_type (char_type_node);
13036 tree pcchar_type_node = build_pointer_type (
13037 build_type_variant (char_type_node, 1, 0));
13038 tree pfloat_type_node = build_pointer_type (float_type_node);
13039 tree pcfloat_type_node = build_pointer_type (
13040 build_type_variant (float_type_node, 1, 0));
13041 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13042 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13043 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13046 tree int_ftype_v4sf_v4sf
13047 = build_function_type_list (integer_type_node,
13048 V4SF_type_node, V4SF_type_node, NULL_TREE);
13049 tree v4si_ftype_v4sf_v4sf
13050 = build_function_type_list (V4SI_type_node,
13051 V4SF_type_node, V4SF_type_node, NULL_TREE);
13052 /* MMX/SSE/integer conversions. */
13053 tree int_ftype_v4sf
13054 = build_function_type_list (integer_type_node,
13055 V4SF_type_node, NULL_TREE);
13056 tree int64_ftype_v4sf
13057 = build_function_type_list (long_long_integer_type_node,
13058 V4SF_type_node, NULL_TREE);
13059 tree int_ftype_v8qi
13060 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13061 tree v4sf_ftype_v4sf_int
13062 = build_function_type_list (V4SF_type_node,
13063 V4SF_type_node, integer_type_node, NULL_TREE);
13064 tree v4sf_ftype_v4sf_int64
13065 = build_function_type_list (V4SF_type_node,
13066 V4SF_type_node, long_long_integer_type_node,
13068 tree v4sf_ftype_v4sf_v2si
13069 = build_function_type_list (V4SF_type_node,
13070 V4SF_type_node, V2SI_type_node, NULL_TREE);
13071 tree int_ftype_v4hi_int
13072 = build_function_type_list (integer_type_node,
13073 V4HI_type_node, integer_type_node, NULL_TREE);
13074 tree v4hi_ftype_v4hi_int_int
13075 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13076 integer_type_node, integer_type_node,
13078 /* Miscellaneous. */
13079 tree v8qi_ftype_v4hi_v4hi
13080 = build_function_type_list (V8QI_type_node,
13081 V4HI_type_node, V4HI_type_node, NULL_TREE);
13082 tree v4hi_ftype_v2si_v2si
13083 = build_function_type_list (V4HI_type_node,
13084 V2SI_type_node, V2SI_type_node, NULL_TREE);
13085 tree v4sf_ftype_v4sf_v4sf_int
13086 = build_function_type_list (V4SF_type_node,
13087 V4SF_type_node, V4SF_type_node,
13088 integer_type_node, NULL_TREE);
13089 tree v2si_ftype_v4hi_v4hi
13090 = build_function_type_list (V2SI_type_node,
13091 V4HI_type_node, V4HI_type_node, NULL_TREE);
13092 tree v4hi_ftype_v4hi_int
13093 = build_function_type_list (V4HI_type_node,
13094 V4HI_type_node, integer_type_node, NULL_TREE);
13095 tree v4hi_ftype_v4hi_di
13096 = build_function_type_list (V4HI_type_node,
13097 V4HI_type_node, long_long_unsigned_type_node,
13099 tree v2si_ftype_v2si_di
13100 = build_function_type_list (V2SI_type_node,
13101 V2SI_type_node, long_long_unsigned_type_node,
13103 tree void_ftype_void
13104 = build_function_type (void_type_node, void_list_node);
13105 tree void_ftype_unsigned
13106 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13107 tree void_ftype_unsigned_unsigned
13108 = build_function_type_list (void_type_node, unsigned_type_node,
13109 unsigned_type_node, NULL_TREE);
13110 tree void_ftype_pcvoid_unsigned_unsigned
13111 = build_function_type_list (void_type_node, const_ptr_type_node,
13112 unsigned_type_node, unsigned_type_node,
13114 tree unsigned_ftype_void
13115 = build_function_type (unsigned_type_node, void_list_node);
13117 = build_function_type (long_long_unsigned_type_node, void_list_node);
13118 tree v4sf_ftype_void
13119 = build_function_type (V4SF_type_node, void_list_node);
13120 tree v2si_ftype_v4sf
13121 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13122 /* Loads/stores. */
13123 tree void_ftype_v8qi_v8qi_pchar
13124 = build_function_type_list (void_type_node,
13125 V8QI_type_node, V8QI_type_node,
13126 pchar_type_node, NULL_TREE);
13127 tree v4sf_ftype_pcfloat
13128 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13129 /* @@@ the type is bogus */
13130 tree v4sf_ftype_v4sf_pv2si
13131 = build_function_type_list (V4SF_type_node,
13132 V4SF_type_node, pv2si_type_node, NULL_TREE);
13133 tree void_ftype_pv2si_v4sf
13134 = build_function_type_list (void_type_node,
13135 pv2si_type_node, V4SF_type_node, NULL_TREE);
13136 tree void_ftype_pfloat_v4sf
13137 = build_function_type_list (void_type_node,
13138 pfloat_type_node, V4SF_type_node, NULL_TREE);
13139 tree void_ftype_pdi_di
13140 = build_function_type_list (void_type_node,
13141 pdi_type_node, long_long_unsigned_type_node,
13143 tree void_ftype_pv2di_v2di
13144 = build_function_type_list (void_type_node,
13145 pv2di_type_node, V2DI_type_node, NULL_TREE);
13146 /* Normal vector unops. */
13147 tree v4sf_ftype_v4sf
13148 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13150 /* Normal vector binops. */
13151 tree v4sf_ftype_v4sf_v4sf
13152 = build_function_type_list (V4SF_type_node,
13153 V4SF_type_node, V4SF_type_node, NULL_TREE);
13154 tree v8qi_ftype_v8qi_v8qi
13155 = build_function_type_list (V8QI_type_node,
13156 V8QI_type_node, V8QI_type_node, NULL_TREE);
13157 tree v4hi_ftype_v4hi_v4hi
13158 = build_function_type_list (V4HI_type_node,
13159 V4HI_type_node, V4HI_type_node, NULL_TREE);
13160 tree v2si_ftype_v2si_v2si
13161 = build_function_type_list (V2SI_type_node,
13162 V2SI_type_node, V2SI_type_node, NULL_TREE);
13163 tree di_ftype_di_di
13164 = build_function_type_list (long_long_unsigned_type_node,
13165 long_long_unsigned_type_node,
13166 long_long_unsigned_type_node, NULL_TREE);
13168 tree v2si_ftype_v2sf
13169 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13170 tree v2sf_ftype_v2si
13171 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13172 tree v2si_ftype_v2si
13173 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13174 tree v2sf_ftype_v2sf
13175 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13176 tree v2sf_ftype_v2sf_v2sf
13177 = build_function_type_list (V2SF_type_node,
13178 V2SF_type_node, V2SF_type_node, NULL_TREE);
13179 tree v2si_ftype_v2sf_v2sf
13180 = build_function_type_list (V2SI_type_node,
13181 V2SF_type_node, V2SF_type_node, NULL_TREE);
13182 tree pint_type_node = build_pointer_type (integer_type_node);
13183 tree pcint_type_node = build_pointer_type (
13184 build_type_variant (integer_type_node, 1, 0));
13185 tree pdouble_type_node = build_pointer_type (double_type_node);
13186 tree pcdouble_type_node = build_pointer_type (
13187 build_type_variant (double_type_node, 1, 0));
13188 tree int_ftype_v2df_v2df
13189 = build_function_type_list (integer_type_node,
13190 V2DF_type_node, V2DF_type_node, NULL_TREE);
13193 = build_function_type (intTI_type_node, void_list_node);
13194 tree v2di_ftype_void
13195 = build_function_type (V2DI_type_node, void_list_node);
13196 tree ti_ftype_ti_ti
13197 = build_function_type_list (intTI_type_node,
13198 intTI_type_node, intTI_type_node, NULL_TREE);
13199 tree void_ftype_pcvoid
13200 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13202 = build_function_type_list (V2DI_type_node,
13203 long_long_unsigned_type_node, NULL_TREE);
13205 = build_function_type_list (long_long_unsigned_type_node,
13206 V2DI_type_node, NULL_TREE);
13207 tree v4sf_ftype_v4si
13208 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13209 tree v4si_ftype_v4sf
13210 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13211 tree v2df_ftype_v4si
13212 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13213 tree v4si_ftype_v2df
13214 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13215 tree v2si_ftype_v2df
13216 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13217 tree v4sf_ftype_v2df
13218 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13219 tree v2df_ftype_v2si
13220 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13221 tree v2df_ftype_v4sf
13222 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13223 tree int_ftype_v2df
13224 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13225 tree int64_ftype_v2df
13226 = build_function_type_list (long_long_integer_type_node,
13227 V2DF_type_node, NULL_TREE);
13228 tree v2df_ftype_v2df_int
13229 = build_function_type_list (V2DF_type_node,
13230 V2DF_type_node, integer_type_node, NULL_TREE);
13231 tree v2df_ftype_v2df_int64
13232 = build_function_type_list (V2DF_type_node,
13233 V2DF_type_node, long_long_integer_type_node,
13235 tree v4sf_ftype_v4sf_v2df
13236 = build_function_type_list (V4SF_type_node,
13237 V4SF_type_node, V2DF_type_node, NULL_TREE);
13238 tree v2df_ftype_v2df_v4sf
13239 = build_function_type_list (V2DF_type_node,
13240 V2DF_type_node, V4SF_type_node, NULL_TREE);
13241 tree v2df_ftype_v2df_v2df_int
13242 = build_function_type_list (V2DF_type_node,
13243 V2DF_type_node, V2DF_type_node,
13246 tree v2df_ftype_v2df_pv2si
13247 = build_function_type_list (V2DF_type_node,
13248 V2DF_type_node, pv2si_type_node, NULL_TREE);
13249 tree void_ftype_pv2si_v2df
13250 = build_function_type_list (void_type_node,
13251 pv2si_type_node, V2DF_type_node, NULL_TREE);
13252 tree void_ftype_pdouble_v2df
13253 = build_function_type_list (void_type_node,
13254 pdouble_type_node, V2DF_type_node, NULL_TREE);
13255 tree void_ftype_pint_int
13256 = build_function_type_list (void_type_node,
13257 pint_type_node, integer_type_node, NULL_TREE);
13258 tree void_ftype_v16qi_v16qi_pchar
13259 = build_function_type_list (void_type_node,
13260 V16QI_type_node, V16QI_type_node,
13261 pchar_type_node, NULL_TREE);
13262 tree v2df_ftype_pcdouble
13263 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13264 tree v2df_ftype_v2df_v2df
13265 = build_function_type_list (V2DF_type_node,
13266 V2DF_type_node, V2DF_type_node, NULL_TREE);
13267 tree v16qi_ftype_v16qi_v16qi
13268 = build_function_type_list (V16QI_type_node,
13269 V16QI_type_node, V16QI_type_node, NULL_TREE);
13270 tree v8hi_ftype_v8hi_v8hi
13271 = build_function_type_list (V8HI_type_node,
13272 V8HI_type_node, V8HI_type_node, NULL_TREE);
13273 tree v4si_ftype_v4si_v4si
13274 = build_function_type_list (V4SI_type_node,
13275 V4SI_type_node, V4SI_type_node, NULL_TREE);
13276 tree v2di_ftype_v2di_v2di
13277 = build_function_type_list (V2DI_type_node,
13278 V2DI_type_node, V2DI_type_node, NULL_TREE);
13279 tree v2di_ftype_v2df_v2df
13280 = build_function_type_list (V2DI_type_node,
13281 V2DF_type_node, V2DF_type_node, NULL_TREE);
13282 tree v2df_ftype_v2df
13283 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13284 tree v2df_ftype_double
13285 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13286 tree v2df_ftype_double_double
13287 = build_function_type_list (V2DF_type_node,
13288 double_type_node, double_type_node, NULL_TREE);
13289 tree int_ftype_v8hi_int
13290 = build_function_type_list (integer_type_node,
13291 V8HI_type_node, integer_type_node, NULL_TREE);
13292 tree v8hi_ftype_v8hi_int_int
13293 = build_function_type_list (V8HI_type_node,
13294 V8HI_type_node, integer_type_node,
13295 integer_type_node, NULL_TREE);
13296 tree v2di_ftype_v2di_int
13297 = build_function_type_list (V2DI_type_node,
13298 V2DI_type_node, integer_type_node, NULL_TREE);
13299 tree v4si_ftype_v4si_int
13300 = build_function_type_list (V4SI_type_node,
13301 V4SI_type_node, integer_type_node, NULL_TREE);
13302 tree v8hi_ftype_v8hi_int
13303 = build_function_type_list (V8HI_type_node,
13304 V8HI_type_node, integer_type_node, NULL_TREE);
13305 tree v8hi_ftype_v8hi_v2di
13306 = build_function_type_list (V8HI_type_node,
13307 V8HI_type_node, V2DI_type_node, NULL_TREE);
13308 tree v4si_ftype_v4si_v2di
13309 = build_function_type_list (V4SI_type_node,
13310 V4SI_type_node, V2DI_type_node, NULL_TREE);
13311 tree v4si_ftype_v8hi_v8hi
13312 = build_function_type_list (V4SI_type_node,
13313 V8HI_type_node, V8HI_type_node, NULL_TREE);
13314 tree di_ftype_v8qi_v8qi
13315 = build_function_type_list (long_long_unsigned_type_node,
13316 V8QI_type_node, V8QI_type_node, NULL_TREE);
13317 tree v2di_ftype_v16qi_v16qi
13318 = build_function_type_list (V2DI_type_node,
13319 V16QI_type_node, V16QI_type_node, NULL_TREE);
13320 tree int_ftype_v16qi
13321 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13322 tree v16qi_ftype_pcchar
13323 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13324 tree void_ftype_pchar_v16qi
13325 = build_function_type_list (void_type_node,
13326 pchar_type_node, V16QI_type_node, NULL_TREE);
13327 tree v4si_ftype_pcint
13328 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13329 tree void_ftype_pcint_v4si
13330 = build_function_type_list (void_type_node,
13331 pcint_type_node, V4SI_type_node, NULL_TREE);
13332 tree v2di_ftype_v2di
13333 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13336 tree float128_type;
13338 /* The __float80 type. */
13339 if (TYPE_MODE (long_double_type_node) == XFmode)
13340 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13344 /* The __float80 type. */
13345 float80_type = make_node (REAL_TYPE);
13346 TYPE_PRECISION (float80_type) = 96;
13347 layout_type (float80_type);
13348 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13351 float128_type = make_node (REAL_TYPE);
13352 TYPE_PRECISION (float128_type) = 128;
13353 layout_type (float128_type);
13354 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13356 /* Add all builtins that are more or less simple operations on two
13358 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13360 /* Use one of the operands; the target can have a different mode for
13361 mask-generating compares. */
13362 enum machine_mode mode;
13367 mode = insn_data[d->icode].operand[1].mode;
13372 type = v16qi_ftype_v16qi_v16qi;
13375 type = v8hi_ftype_v8hi_v8hi;
13378 type = v4si_ftype_v4si_v4si;
13381 type = v2di_ftype_v2di_v2di;
13384 type = v2df_ftype_v2df_v2df;
13387 type = ti_ftype_ti_ti;
13390 type = v4sf_ftype_v4sf_v4sf;
13393 type = v8qi_ftype_v8qi_v8qi;
13396 type = v4hi_ftype_v4hi_v4hi;
13399 type = v2si_ftype_v2si_v2si;
13402 type = di_ftype_di_di;
13409 /* Override for comparisons. */
13410 if (d->icode == CODE_FOR_maskcmpv4sf3
13411 || d->icode == CODE_FOR_maskncmpv4sf3
13412 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13413 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13414 type = v4si_ftype_v4sf_v4sf;
13416 if (d->icode == CODE_FOR_maskcmpv2df3
13417 || d->icode == CODE_FOR_maskncmpv2df3
13418 || d->icode == CODE_FOR_vmmaskcmpv2df3
13419 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13420 type = v2di_ftype_v2df_v2df;
13422 def_builtin (d->mask, d->name, type, d->code);
13425 /* Add the remaining MMX insns with somewhat more complicated types. */
13426 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13427 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13428 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13429 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13430 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13432 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13433 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13434 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13436 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13437 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13439 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13440 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13442 /* comi/ucomi insns. */
13443 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13444 if (d->mask == MASK_SSE2)
13445 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13447 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13449 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13450 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13451 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13453 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13454 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13455 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13456 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13457 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13458 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13459 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13460 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13461 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13462 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13463 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13465 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13466 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13468 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13470 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13471 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13472 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13473 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13474 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13475 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13477 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13478 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13479 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13480 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13482 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13483 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13484 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13485 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13487 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13489 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13491 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13492 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13493 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13494 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13495 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13496 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13498 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13500 /* Original 3DNow! */
13501 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13515 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13516 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13517 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13518 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13519 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13520 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13522 /* 3DNow! extension as used in the Athlon CPU. */
13523 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13524 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13525 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13526 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13527 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13528 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13530 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13552 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13581 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13582 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13589 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13613 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13641 /* Prescott New Instructions. */
13642 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13643 void_ftype_pcvoid_unsigned_unsigned,
13644 IX86_BUILTIN_MONITOR);
13645 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13646 void_ftype_unsigned_unsigned,
13647 IX86_BUILTIN_MWAIT);
13648 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13650 IX86_BUILTIN_MOVSHDUP);
13651 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13653 IX86_BUILTIN_MOVSLDUP);
13654 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13655 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13656 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13657 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13658 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13659 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13662 /* Errors in the source file can cause expand_expr to return const0_rtx
13663 where we expect a vector. To avoid crashing, use one of the vector
13664 clear instructions. */
13666 safe_vector_operand (rtx x, enum machine_mode mode)
13668 if (x != const0_rtx)
13670 x = gen_reg_rtx (mode);
13672 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13673 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13674 : gen_rtx_SUBREG (DImode, x, 0)));
13676 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13677 : gen_rtx_SUBREG (V4SFmode, x, 0),
13678 CONST0_RTX (V4SFmode)));
13682 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13685 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13688 tree arg0 = TREE_VALUE (arglist);
13689 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13690 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13691 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13692 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13693 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13694 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13696 if (VECTOR_MODE_P (mode0))
13697 op0 = safe_vector_operand (op0, mode0);
13698 if (VECTOR_MODE_P (mode1))
13699 op1 = safe_vector_operand (op1, mode1);
13702 || GET_MODE (target) != tmode
13703 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13704 target = gen_reg_rtx (tmode);
13706 if (GET_MODE (op1) == SImode && mode1 == TImode)
13708 rtx x = gen_reg_rtx (V4SImode);
13709 emit_insn (gen_sse2_loadd (x, op1));
13710 op1 = gen_lowpart (TImode, x);
13713 /* In case the insn wants input operands in modes different from
13714 the result, abort. */
13715 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13716 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13719 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13720 op0 = copy_to_mode_reg (mode0, op0);
13721 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13722 op1 = copy_to_mode_reg (mode1, op1);
13724 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13725 yet one of the two must not be a memory. This is normally enforced
13726 by expanders, but we didn't bother to create one here. */
13727 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13728 op0 = copy_to_mode_reg (mode0, op0);
13730 pat = GEN_FCN (icode) (target, op0, op1);
13737 /* Subroutine of ix86_expand_builtin to take care of stores. */
13740 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13743 tree arg0 = TREE_VALUE (arglist);
13744 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13745 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13746 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13747 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13748 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13750 if (VECTOR_MODE_P (mode1))
13751 op1 = safe_vector_operand (op1, mode1);
13753 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13754 op1 = copy_to_mode_reg (mode1, op1);
13756 pat = GEN_FCN (icode) (op0, op1);
13762 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13765 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13766 rtx target, int do_load)
13769 tree arg0 = TREE_VALUE (arglist);
13770 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13771 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13772 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13775 || GET_MODE (target) != tmode
13776 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13777 target = gen_reg_rtx (tmode);
13779 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13782 if (VECTOR_MODE_P (mode0))
13783 op0 = safe_vector_operand (op0, mode0);
13785 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13786 op0 = copy_to_mode_reg (mode0, op0);
13789 pat = GEN_FCN (icode) (target, op0);
13796 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13797 sqrtss, rsqrtss, rcpss. */
13800 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13803 tree arg0 = TREE_VALUE (arglist);
13804 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13805 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13806 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13809 || GET_MODE (target) != tmode
13810 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13811 target = gen_reg_rtx (tmode);
13813 if (VECTOR_MODE_P (mode0))
13814 op0 = safe_vector_operand (op0, mode0);
13816 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13817 op0 = copy_to_mode_reg (mode0, op0);
13820 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13821 op1 = copy_to_mode_reg (mode0, op1);
13823 pat = GEN_FCN (icode) (target, op0, op1);
13830 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13833 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13837 tree arg0 = TREE_VALUE (arglist);
13838 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13839 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13840 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13842 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13843 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13844 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13845 enum rtx_code comparison = d->comparison;
13847 if (VECTOR_MODE_P (mode0))
13848 op0 = safe_vector_operand (op0, mode0);
13849 if (VECTOR_MODE_P (mode1))
13850 op1 = safe_vector_operand (op1, mode1);
13852 /* Swap operands if we have a comparison that isn't available in
13856 rtx tmp = gen_reg_rtx (mode1);
13857 emit_move_insn (tmp, op1);
13863 || GET_MODE (target) != tmode
13864 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13865 target = gen_reg_rtx (tmode);
13867 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13868 op0 = copy_to_mode_reg (mode0, op0);
13869 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13870 op1 = copy_to_mode_reg (mode1, op1);
13872 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13873 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13880 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13883 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13887 tree arg0 = TREE_VALUE (arglist);
13888 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13889 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13890 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13892 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13893 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13894 enum rtx_code comparison = d->comparison;
13896 if (VECTOR_MODE_P (mode0))
13897 op0 = safe_vector_operand (op0, mode0);
13898 if (VECTOR_MODE_P (mode1))
13899 op1 = safe_vector_operand (op1, mode1);
13901 /* Swap operands if we have a comparison that isn't available in
13910 target = gen_reg_rtx (SImode);
13911 emit_move_insn (target, const0_rtx);
13912 target = gen_rtx_SUBREG (QImode, target, 0);
13914 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13915 op0 = copy_to_mode_reg (mode0, op0);
13916 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13917 op1 = copy_to_mode_reg (mode1, op1);
13919 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13920 pat = GEN_FCN (d->icode) (op0, op1);
13924 emit_insn (gen_rtx_SET (VOIDmode,
13925 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13926 gen_rtx_fmt_ee (comparison, QImode,
13930 return SUBREG_REG (target);
13933 /* Expand an expression EXP that calls a built-in function,
13934 with result going to TARGET if that's convenient
13935 (and in mode MODE if that's convenient).
13936 SUBTARGET may be used as the target for computing one of EXP's operands.
13937 IGNORE is nonzero if the value is to be ignored. */
13940 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13941 enum machine_mode mode ATTRIBUTE_UNUSED,
13942 int ignore ATTRIBUTE_UNUSED)
13944 const struct builtin_description *d;
13946 enum insn_code icode;
13947 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13948 tree arglist = TREE_OPERAND (exp, 1);
13949 tree arg0, arg1, arg2;
13950 rtx op0, op1, op2, pat;
13951 enum machine_mode tmode, mode0, mode1, mode2;
13952 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13956 case IX86_BUILTIN_EMMS:
13957 emit_insn (gen_emms ());
13960 case IX86_BUILTIN_SFENCE:
13961 emit_insn (gen_sfence ());
13964 case IX86_BUILTIN_PEXTRW:
13965 case IX86_BUILTIN_PEXTRW128:
13966 icode = (fcode == IX86_BUILTIN_PEXTRW
13967 ? CODE_FOR_mmx_pextrw
13968 : CODE_FOR_sse2_pextrw);
13969 arg0 = TREE_VALUE (arglist);
13970 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13971 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13972 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13973 tmode = insn_data[icode].operand[0].mode;
13974 mode0 = insn_data[icode].operand[1].mode;
13975 mode1 = insn_data[icode].operand[2].mode;
13977 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13978 op0 = copy_to_mode_reg (mode0, op0);
13979 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13981 error ("selector must be an integer constant in the range 0..%i",
13982 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13983 return gen_reg_rtx (tmode);
13986 || GET_MODE (target) != tmode
13987 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13988 target = gen_reg_rtx (tmode);
13989 pat = GEN_FCN (icode) (target, op0, op1);
13995 case IX86_BUILTIN_PINSRW:
13996 case IX86_BUILTIN_PINSRW128:
13997 icode = (fcode == IX86_BUILTIN_PINSRW
13998 ? CODE_FOR_mmx_pinsrw
13999 : CODE_FOR_sse2_pinsrw);
14000 arg0 = TREE_VALUE (arglist);
14001 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14002 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14003 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14004 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14005 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14006 tmode = insn_data[icode].operand[0].mode;
14007 mode0 = insn_data[icode].operand[1].mode;
14008 mode1 = insn_data[icode].operand[2].mode;
14009 mode2 = insn_data[icode].operand[3].mode;
14011 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14012 op0 = copy_to_mode_reg (mode0, op0);
14013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14014 op1 = copy_to_mode_reg (mode1, op1);
14015 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14017 error ("selector must be an integer constant in the range 0..%i",
14018 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14022 || GET_MODE (target) != tmode
14023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14024 target = gen_reg_rtx (tmode);
14025 pat = GEN_FCN (icode) (target, op0, op1, op2);
14031 case IX86_BUILTIN_MASKMOVQ:
14032 case IX86_BUILTIN_MASKMOVDQU:
14033 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14034 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14035 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14036 : CODE_FOR_sse2_maskmovdqu));
14037 /* Note the arg order is different from the operand order. */
14038 arg1 = TREE_VALUE (arglist);
14039 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14040 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14041 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14042 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14043 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14044 mode0 = insn_data[icode].operand[0].mode;
14045 mode1 = insn_data[icode].operand[1].mode;
14046 mode2 = insn_data[icode].operand[2].mode;
14048 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14049 op0 = copy_to_mode_reg (mode0, op0);
14050 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14051 op1 = copy_to_mode_reg (mode1, op1);
14052 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14053 op2 = copy_to_mode_reg (mode2, op2);
14054 pat = GEN_FCN (icode) (op0, op1, op2);
14060 case IX86_BUILTIN_SQRTSS:
14061 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14062 case IX86_BUILTIN_RSQRTSS:
14063 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14064 case IX86_BUILTIN_RCPSS:
14065 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14067 case IX86_BUILTIN_LOADAPS:
14068 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14070 case IX86_BUILTIN_LOADUPS:
14071 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14073 case IX86_BUILTIN_STOREAPS:
14074 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14076 case IX86_BUILTIN_STOREUPS:
14077 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14079 case IX86_BUILTIN_LOADSS:
14080 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14082 case IX86_BUILTIN_STORESS:
14083 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14085 case IX86_BUILTIN_LOADHPS:
14086 case IX86_BUILTIN_LOADLPS:
14087 case IX86_BUILTIN_LOADHPD:
14088 case IX86_BUILTIN_LOADLPD:
14089 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14090 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14091 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14092 : CODE_FOR_sse2_movlpd);
14093 arg0 = TREE_VALUE (arglist);
14094 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14095 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14096 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14097 tmode = insn_data[icode].operand[0].mode;
14098 mode0 = insn_data[icode].operand[1].mode;
14099 mode1 = insn_data[icode].operand[2].mode;
14101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14102 op0 = copy_to_mode_reg (mode0, op0);
14103 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14105 || GET_MODE (target) != tmode
14106 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14107 target = gen_reg_rtx (tmode);
14108 pat = GEN_FCN (icode) (target, op0, op1);
14114 case IX86_BUILTIN_STOREHPS:
14115 case IX86_BUILTIN_STORELPS:
14116 case IX86_BUILTIN_STOREHPD:
14117 case IX86_BUILTIN_STORELPD:
14118 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14119 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14120 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14121 : CODE_FOR_sse2_movlpd);
14122 arg0 = TREE_VALUE (arglist);
14123 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14124 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14125 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14126 mode0 = insn_data[icode].operand[1].mode;
14127 mode1 = insn_data[icode].operand[2].mode;
14129 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14131 op1 = copy_to_mode_reg (mode1, op1);
14133 pat = GEN_FCN (icode) (op0, op0, op1);
14139 case IX86_BUILTIN_MOVNTPS:
14140 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14141 case IX86_BUILTIN_MOVNTQ:
14142 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14144 case IX86_BUILTIN_LDMXCSR:
14145 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14146 target = assign_386_stack_local (SImode, 0);
14147 emit_move_insn (target, op0);
14148 emit_insn (gen_ldmxcsr (target));
14151 case IX86_BUILTIN_STMXCSR:
14152 target = assign_386_stack_local (SImode, 0);
14153 emit_insn (gen_stmxcsr (target));
14154 return copy_to_mode_reg (SImode, target);
14156 case IX86_BUILTIN_SHUFPS:
14157 case IX86_BUILTIN_SHUFPD:
14158 icode = (fcode == IX86_BUILTIN_SHUFPS
14159 ? CODE_FOR_sse_shufps
14160 : CODE_FOR_sse2_shufpd);
14161 arg0 = TREE_VALUE (arglist);
14162 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14163 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14164 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14165 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14166 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14167 tmode = insn_data[icode].operand[0].mode;
14168 mode0 = insn_data[icode].operand[1].mode;
14169 mode1 = insn_data[icode].operand[2].mode;
14170 mode2 = insn_data[icode].operand[3].mode;
14172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14173 op0 = copy_to_mode_reg (mode0, op0);
14174 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14175 op1 = copy_to_mode_reg (mode1, op1);
14176 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14178 /* @@@ better error message */
14179 error ("mask must be an immediate");
14180 return gen_reg_rtx (tmode);
14183 || GET_MODE (target) != tmode
14184 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14185 target = gen_reg_rtx (tmode);
14186 pat = GEN_FCN (icode) (target, op0, op1, op2);
14192 case IX86_BUILTIN_PSHUFW:
14193 case IX86_BUILTIN_PSHUFD:
14194 case IX86_BUILTIN_PSHUFHW:
14195 case IX86_BUILTIN_PSHUFLW:
14196 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14197 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14198 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14199 : CODE_FOR_mmx_pshufw);
14200 arg0 = TREE_VALUE (arglist);
14201 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14202 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14203 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14204 tmode = insn_data[icode].operand[0].mode;
14205 mode1 = insn_data[icode].operand[1].mode;
14206 mode2 = insn_data[icode].operand[2].mode;
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14209 op0 = copy_to_mode_reg (mode1, op0);
14210 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14212 /* @@@ better error message */
14213 error ("mask must be an immediate");
14217 || GET_MODE (target) != tmode
14218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14219 target = gen_reg_rtx (tmode);
14220 pat = GEN_FCN (icode) (target, op0, op1);
14226 case IX86_BUILTIN_PSLLDQI128:
14227 case IX86_BUILTIN_PSRLDQI128:
14228 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14229 : CODE_FOR_sse2_lshrti3);
14230 arg0 = TREE_VALUE (arglist);
14231 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14232 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14233 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14234 tmode = insn_data[icode].operand[0].mode;
14235 mode1 = insn_data[icode].operand[1].mode;
14236 mode2 = insn_data[icode].operand[2].mode;
14238 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14240 op0 = copy_to_reg (op0);
14241 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14243 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14245 error ("shift must be an immediate");
14248 target = gen_reg_rtx (V2DImode);
14249 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14255 case IX86_BUILTIN_FEMMS:
14256 emit_insn (gen_femms ());
14259 case IX86_BUILTIN_PAVGUSB:
14260 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14262 case IX86_BUILTIN_PF2ID:
14263 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14265 case IX86_BUILTIN_PFACC:
14266 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14268 case IX86_BUILTIN_PFADD:
14269 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14271 case IX86_BUILTIN_PFCMPEQ:
14272 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14274 case IX86_BUILTIN_PFCMPGE:
14275 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14277 case IX86_BUILTIN_PFCMPGT:
14278 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14280 case IX86_BUILTIN_PFMAX:
14281 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14283 case IX86_BUILTIN_PFMIN:
14284 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14286 case IX86_BUILTIN_PFMUL:
14287 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14289 case IX86_BUILTIN_PFRCP:
14290 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14292 case IX86_BUILTIN_PFRCPIT1:
14293 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14295 case IX86_BUILTIN_PFRCPIT2:
14296 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14298 case IX86_BUILTIN_PFRSQIT1:
14299 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14301 case IX86_BUILTIN_PFRSQRT:
14302 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14304 case IX86_BUILTIN_PFSUB:
14305 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14307 case IX86_BUILTIN_PFSUBR:
14308 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14310 case IX86_BUILTIN_PI2FD:
14311 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14313 case IX86_BUILTIN_PMULHRW:
14314 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14316 case IX86_BUILTIN_PF2IW:
14317 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14319 case IX86_BUILTIN_PFNACC:
14320 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14322 case IX86_BUILTIN_PFPNACC:
14323 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14325 case IX86_BUILTIN_PI2FW:
14326 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14328 case IX86_BUILTIN_PSWAPDSI:
14329 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14331 case IX86_BUILTIN_PSWAPDSF:
14332 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14334 case IX86_BUILTIN_SSE_ZERO:
14335 target = gen_reg_rtx (V4SFmode);
14336 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14339 case IX86_BUILTIN_MMX_ZERO:
14340 target = gen_reg_rtx (DImode);
14341 emit_insn (gen_mmx_clrdi (target));
14344 case IX86_BUILTIN_CLRTI:
14345 target = gen_reg_rtx (V2DImode);
14346 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14350 case IX86_BUILTIN_SQRTSD:
14351 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14352 case IX86_BUILTIN_LOADAPD:
14353 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14354 case IX86_BUILTIN_LOADUPD:
14355 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14357 case IX86_BUILTIN_STOREAPD:
14358 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14359 case IX86_BUILTIN_STOREUPD:
14360 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14362 case IX86_BUILTIN_LOADSD:
14363 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14365 case IX86_BUILTIN_STORESD:
14366 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14368 case IX86_BUILTIN_SETPD1:
14369 target = assign_386_stack_local (DFmode, 0);
14370 arg0 = TREE_VALUE (arglist);
14371 emit_move_insn (adjust_address (target, DFmode, 0),
14372 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14373 op0 = gen_reg_rtx (V2DFmode);
14374 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14375 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14378 case IX86_BUILTIN_SETPD:
14379 target = assign_386_stack_local (V2DFmode, 0);
14380 arg0 = TREE_VALUE (arglist);
14381 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14382 emit_move_insn (adjust_address (target, DFmode, 0),
14383 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14384 emit_move_insn (adjust_address (target, DFmode, 8),
14385 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14386 op0 = gen_reg_rtx (V2DFmode);
14387 emit_insn (gen_sse2_movapd (op0, target));
14390 case IX86_BUILTIN_LOADRPD:
14391 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14392 gen_reg_rtx (V2DFmode), 1);
14393 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14396 case IX86_BUILTIN_LOADPD1:
14397 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14398 gen_reg_rtx (V2DFmode), 1);
14399 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14402 case IX86_BUILTIN_STOREPD1:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14404 case IX86_BUILTIN_STORERPD:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14407 case IX86_BUILTIN_CLRPD:
14408 target = gen_reg_rtx (V2DFmode);
14409 emit_insn (gen_sse_clrv2df (target));
14412 case IX86_BUILTIN_MFENCE:
14413 emit_insn (gen_sse2_mfence ());
14415 case IX86_BUILTIN_LFENCE:
14416 emit_insn (gen_sse2_lfence ());
14419 case IX86_BUILTIN_CLFLUSH:
14420 arg0 = TREE_VALUE (arglist);
14421 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14422 icode = CODE_FOR_sse2_clflush;
14423 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14424 op0 = copy_to_mode_reg (Pmode, op0);
14426 emit_insn (gen_sse2_clflush (op0));
14429 case IX86_BUILTIN_MOVNTPD:
14430 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14431 case IX86_BUILTIN_MOVNTDQ:
14432 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14433 case IX86_BUILTIN_MOVNTI:
14434 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14436 case IX86_BUILTIN_LOADDQA:
14437 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14438 case IX86_BUILTIN_LOADDQU:
14439 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14440 case IX86_BUILTIN_LOADD:
14441 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14443 case IX86_BUILTIN_STOREDQA:
14444 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14445 case IX86_BUILTIN_STOREDQU:
14446 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14447 case IX86_BUILTIN_STORED:
14448 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14450 case IX86_BUILTIN_MONITOR:
14451 arg0 = TREE_VALUE (arglist);
14452 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14453 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14454 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14455 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14456 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14458 op0 = copy_to_mode_reg (SImode, op0);
14460 op1 = copy_to_mode_reg (SImode, op1);
14462 op2 = copy_to_mode_reg (SImode, op2);
14463 emit_insn (gen_monitor (op0, op1, op2));
14466 case IX86_BUILTIN_MWAIT:
14467 arg0 = TREE_VALUE (arglist);
14468 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14469 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14470 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14472 op0 = copy_to_mode_reg (SImode, op0);
14474 op1 = copy_to_mode_reg (SImode, op1);
14475 emit_insn (gen_mwait (op0, op1));
14478 case IX86_BUILTIN_LOADDDUP:
14479 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14481 case IX86_BUILTIN_LDDQU:
14482 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14489 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14490 if (d->code == fcode)
14492 /* Compares are treated specially. */
14493 if (d->icode == CODE_FOR_maskcmpv4sf3
14494 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14495 || d->icode == CODE_FOR_maskncmpv4sf3
14496 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14497 || d->icode == CODE_FOR_maskcmpv2df3
14498 || d->icode == CODE_FOR_vmmaskcmpv2df3
14499 || d->icode == CODE_FOR_maskncmpv2df3
14500 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14501 return ix86_expand_sse_compare (d, arglist, target);
14503 return ix86_expand_binop_builtin (d->icode, arglist, target);
14506 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14507 if (d->code == fcode)
14508 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14510 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14511 if (d->code == fcode)
14512 return ix86_expand_sse_comi (d, arglist, target);
14514 /* @@@ Should really do something sensible here. */
14518 /* Store OPERAND to the memory after reload is completed. This means
14519 that we can't easily use assign_stack_local. */
14521 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14524 if (!reload_completed)
14526 if (TARGET_RED_ZONE)
14528 result = gen_rtx_MEM (mode,
14529 gen_rtx_PLUS (Pmode,
14531 GEN_INT (-RED_ZONE_SIZE)));
14532 emit_move_insn (result, operand);
14534 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14540 operand = gen_lowpart (DImode, operand);
14544 gen_rtx_SET (VOIDmode,
14545 gen_rtx_MEM (DImode,
14546 gen_rtx_PRE_DEC (DImode,
14547 stack_pointer_rtx)),
14553 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14562 split_di (&operand, 1, operands, operands + 1);
14564 gen_rtx_SET (VOIDmode,
14565 gen_rtx_MEM (SImode,
14566 gen_rtx_PRE_DEC (Pmode,
14567 stack_pointer_rtx)),
14570 gen_rtx_SET (VOIDmode,
14571 gen_rtx_MEM (SImode,
14572 gen_rtx_PRE_DEC (Pmode,
14573 stack_pointer_rtx)),
14578 /* It is better to store HImodes as SImodes. */
14579 if (!TARGET_PARTIAL_REG_STALL)
14580 operand = gen_lowpart (SImode, operand);
14584 gen_rtx_SET (VOIDmode,
14585 gen_rtx_MEM (GET_MODE (operand),
14586 gen_rtx_PRE_DEC (SImode,
14587 stack_pointer_rtx)),
14593 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14598 /* Free operand from the memory. */
14600 ix86_free_from_memory (enum machine_mode mode)
14602 if (!TARGET_RED_ZONE)
14606 if (mode == DImode || TARGET_64BIT)
14608 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14612 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14613 to pop or add instruction if registers are available. */
14614 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14615 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14620 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14621 QImode must go into class Q_REGS.
14622 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14623 movdf to do mem-to-mem moves through integer regs. */
14625 ix86_preferred_reload_class (rtx x, enum reg_class class)
14627 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14629 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14631 /* SSE can't load any constant directly yet. */
14632 if (SSE_CLASS_P (class))
14634 /* Floats can load 0 and 1. */
14635 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14637 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14638 if (MAYBE_SSE_CLASS_P (class))
14639 return (reg_class_subset_p (class, GENERAL_REGS)
14640 ? GENERAL_REGS : FLOAT_REGS);
14644 /* General regs can load everything. */
14645 if (reg_class_subset_p (class, GENERAL_REGS))
14646 return GENERAL_REGS;
14647 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14648 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14651 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14653 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14658 /* If we are copying between general and FP registers, we need a memory
14659 location. The same is true for SSE and MMX registers.
14661 The macro can't work reliably when one of the CLASSES is class containing
14662 registers from multiple units (SSE, MMX, integer). We avoid this by never
14663 combining those units in single alternative in the machine description.
14664 Ensure that this constraint holds to avoid unexpected surprises.
14666 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14667 enforce these sanity checks. */
14669 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14670 enum machine_mode mode, int strict)
14672 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14673 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14674 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14675 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14676 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14677 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14684 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14685 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14686 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14687 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14688 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14690 /* Return the cost of moving data from a register in class CLASS1 to
14691 one in class CLASS2.
14693 It is not required that the cost always equal 2 when FROM is the same as TO;
14694 on some machines it is expensive to move between registers if they are not
14695 general registers. */
14697 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14698 enum reg_class class2)
14700 /* In case we require secondary memory, compute cost of the store followed
14701 by load. In order to avoid bad register allocation choices, we need
14702 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14704 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14708 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14709 MEMORY_MOVE_COST (mode, class1, 1));
14710 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14711 MEMORY_MOVE_COST (mode, class2, 1));
14713 /* In case of copying from general_purpose_register we may emit multiple
14714 stores followed by single load causing memory size mismatch stall.
14715 Count this as arbitrarily high cost of 20. */
14716 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14719 /* In the case of FP/MMX moves, the registers actually overlap, and we
14720 have to switch modes in order to treat them differently. */
14721 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14722 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14728 /* Moves between SSE/MMX and integer unit are expensive. */
14729 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14730 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14731 return ix86_cost->mmxsse_to_integer;
14732 if (MAYBE_FLOAT_CLASS_P (class1))
14733 return ix86_cost->fp_move;
14734 if (MAYBE_SSE_CLASS_P (class1))
14735 return ix86_cost->sse_move;
14736 if (MAYBE_MMX_CLASS_P (class1))
14737 return ix86_cost->mmx_move;
14741 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14743 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14745 /* Flags and only flags can only hold CCmode values. */
14746 if (CC_REGNO_P (regno))
14747 return GET_MODE_CLASS (mode) == MODE_CC;
14748 if (GET_MODE_CLASS (mode) == MODE_CC
14749 || GET_MODE_CLASS (mode) == MODE_RANDOM
14750 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14752 if (FP_REGNO_P (regno))
14753 return VALID_FP_MODE_P (mode);
14754 if (SSE_REGNO_P (regno))
14755 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14756 if (MMX_REGNO_P (regno))
14758 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14759 /* We handle both integer and floats in the general purpose registers.
14760 In future we should be able to handle vector modes as well. */
14761 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14763 /* Take care for QImode values - they can be in non-QI regs, but then
14764 they do cause partial register stalls. */
14765 if (regno < 4 || mode != QImode || TARGET_64BIT)
14767 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14770 /* Return the cost of moving data of mode M between a
14771 register and memory. A value of 2 is the default; this cost is
14772 relative to those in `REGISTER_MOVE_COST'.
14774 If moving between registers and memory is more expensive than
14775 between two registers, you should define this macro to express the
14778 Model also increased moving costs of QImode registers in non
14782 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14784 if (FLOAT_CLASS_P (class))
14801 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14803 if (SSE_CLASS_P (class))
14806 switch (GET_MODE_SIZE (mode))
14820 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14822 if (MMX_CLASS_P (class))
14825 switch (GET_MODE_SIZE (mode))
14836 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14838 switch (GET_MODE_SIZE (mode))
14842 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14843 : ix86_cost->movzbl_load);
14845 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14846 : ix86_cost->int_store[0] + 4);
14849 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14851 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14852 if (mode == TFmode)
14854 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14855 * (((int) GET_MODE_SIZE (mode)
14856 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14860 /* Compute a (partial) cost for rtx X. Return true if the complete
14861 cost has been computed, and false if subexpressions should be
14862 scanned. In either case, *TOTAL contains the cost result. */
14865 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14867 enum machine_mode mode = GET_MODE (x);
14875 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14877 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14879 else if (flag_pic && SYMBOLIC_CONST (x)
14881 || (!GET_CODE (x) != LABEL_REF
14882 && (GET_CODE (x) != SYMBOL_REF
14883 || !SYMBOL_REF_LOCAL_P (x)))))
14890 if (mode == VOIDmode)
14893 switch (standard_80387_constant_p (x))
14898 default: /* Other constants */
14903 /* Start with (MEM (SYMBOL_REF)), since that's where
14904 it'll probably end up. Add a penalty for size. */
14905 *total = (COSTS_N_INSNS (1)
14906 + (flag_pic != 0 && !TARGET_64BIT)
14907 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14913 /* The zero extensions is often completely free on x86_64, so make
14914 it as cheap as possible. */
14915 if (TARGET_64BIT && mode == DImode
14916 && GET_MODE (XEXP (x, 0)) == SImode)
14918 else if (TARGET_ZERO_EXTEND_WITH_AND)
14919 *total = COSTS_N_INSNS (ix86_cost->add);
14921 *total = COSTS_N_INSNS (ix86_cost->movzx);
14925 *total = COSTS_N_INSNS (ix86_cost->movsx);
14929 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14930 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14932 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14935 *total = COSTS_N_INSNS (ix86_cost->add);
14938 if ((value == 2 || value == 3)
14939 && !TARGET_DECOMPOSE_LEA
14940 && ix86_cost->lea <= ix86_cost->shift_const)
14942 *total = COSTS_N_INSNS (ix86_cost->lea);
14952 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14954 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14956 if (INTVAL (XEXP (x, 1)) > 32)
14957 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14959 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14963 if (GET_CODE (XEXP (x, 1)) == AND)
14964 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14966 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14971 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14972 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14974 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14979 if (FLOAT_MODE_P (mode))
14980 *total = COSTS_N_INSNS (ix86_cost->fmul);
14981 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14983 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14986 for (nbits = 0; value != 0; value >>= 1)
14989 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14990 + nbits * ix86_cost->mult_bit);
14994 /* This is arbitrary */
14995 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14996 + 7 * ix86_cost->mult_bit);
15004 if (FLOAT_MODE_P (mode))
15005 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15007 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15011 if (FLOAT_MODE_P (mode))
15012 *total = COSTS_N_INSNS (ix86_cost->fadd);
15013 else if (!TARGET_DECOMPOSE_LEA
15014 && GET_MODE_CLASS (mode) == MODE_INT
15015 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15017 if (GET_CODE (XEXP (x, 0)) == PLUS
15018 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15019 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15020 && CONSTANT_P (XEXP (x, 1)))
15022 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15023 if (val == 2 || val == 4 || val == 8)
15025 *total = COSTS_N_INSNS (ix86_cost->lea);
15026 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15027 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15029 *total += rtx_cost (XEXP (x, 1), outer_code);
15033 else if (GET_CODE (XEXP (x, 0)) == MULT
15034 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15036 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15037 if (val == 2 || val == 4 || val == 8)
15039 *total = COSTS_N_INSNS (ix86_cost->lea);
15040 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15041 *total += rtx_cost (XEXP (x, 1), outer_code);
15045 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15047 *total = COSTS_N_INSNS (ix86_cost->lea);
15048 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15049 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15050 *total += rtx_cost (XEXP (x, 1), outer_code);
15057 if (FLOAT_MODE_P (mode))
15059 *total = COSTS_N_INSNS (ix86_cost->fadd);
15067 if (!TARGET_64BIT && mode == DImode)
15069 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15070 + (rtx_cost (XEXP (x, 0), outer_code)
15071 << (GET_MODE (XEXP (x, 0)) != DImode))
15072 + (rtx_cost (XEXP (x, 1), outer_code)
15073 << (GET_MODE (XEXP (x, 1)) != DImode)));
15079 if (FLOAT_MODE_P (mode))
15081 *total = COSTS_N_INSNS (ix86_cost->fchs);
15087 if (!TARGET_64BIT && mode == DImode)
15088 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15090 *total = COSTS_N_INSNS (ix86_cost->add);
15094 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15099 if (FLOAT_MODE_P (mode))
15100 *total = COSTS_N_INSNS (ix86_cost->fabs);
15104 if (FLOAT_MODE_P (mode))
15105 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15109 if (XINT (x, 1) == UNSPEC_TP)
15118 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15120 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15123 fputs ("\tpushl $", asm_out_file);
15124 assemble_name (asm_out_file, XSTR (symbol, 0));
15125 fputc ('\n', asm_out_file);
15131 static int current_machopic_label_num;
15133 /* Given a symbol name and its associated stub, write out the
15134 definition of the stub. */
15137 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15139 unsigned int length;
15140 char *binder_name, *symbol_name, lazy_ptr_name[32];
15141 int label = ++current_machopic_label_num;
15143 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15144 symb = (*targetm.strip_name_encoding) (symb);
15146 length = strlen (stub);
15147 binder_name = alloca (length + 32);
15148 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15150 length = strlen (symb);
15151 symbol_name = alloca (length + 32);
15152 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15154 sprintf (lazy_ptr_name, "L%d$lz", label);
15157 machopic_picsymbol_stub_section ();
15159 machopic_symbol_stub_section ();
15161 fprintf (file, "%s:\n", stub);
15162 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15166 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15167 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15168 fprintf (file, "\tjmp %%edx\n");
15171 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15173 fprintf (file, "%s:\n", binder_name);
15177 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15178 fprintf (file, "\tpushl %%eax\n");
15181 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15183 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15185 machopic_lazy_symbol_ptr_section ();
15186 fprintf (file, "%s:\n", lazy_ptr_name);
15187 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15188 fprintf (file, "\t.long %s\n", binder_name);
15190 #endif /* TARGET_MACHO */
15192 /* Order the registers for register allocator. */
15195 x86_order_regs_for_local_alloc (void)
15200 /* First allocate the local general purpose registers. */
15201 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15202 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15203 reg_alloc_order [pos++] = i;
15205 /* Global general purpose registers. */
15206 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15207 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15208 reg_alloc_order [pos++] = i;
15210 /* x87 registers come first in case we are doing FP math
15212 if (!TARGET_SSE_MATH)
15213 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15214 reg_alloc_order [pos++] = i;
15216 /* SSE registers. */
15217 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15218 reg_alloc_order [pos++] = i;
15219 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15220 reg_alloc_order [pos++] = i;
15222 /* x87 registers. */
15223 if (TARGET_SSE_MATH)
15224 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15225 reg_alloc_order [pos++] = i;
15227 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15228 reg_alloc_order [pos++] = i;
15230 /* Initialize the rest of array as we do not allocate some registers
15232 while (pos < FIRST_PSEUDO_REGISTER)
15233 reg_alloc_order [pos++] = 0;
15236 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15237 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15240 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15241 struct attribute_spec.handler. */
15243 ix86_handle_struct_attribute (tree *node, tree name,
15244 tree args ATTRIBUTE_UNUSED,
15245 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15248 if (DECL_P (*node))
15250 if (TREE_CODE (*node) == TYPE_DECL)
15251 type = &TREE_TYPE (*node);
15256 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15257 || TREE_CODE (*type) == UNION_TYPE)))
15259 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15260 *no_add_attrs = true;
15263 else if ((is_attribute_p ("ms_struct", name)
15264 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15265 || ((is_attribute_p ("gcc_struct", name)
15266 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15268 warning ("`%s' incompatible attribute ignored",
15269 IDENTIFIER_POINTER (name));
15270 *no_add_attrs = true;
15277 ix86_ms_bitfield_layout_p (tree record_type)
15279 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15280 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15281 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15284 /* Returns an expression indicating where the this parameter is
15285 located on entry to the FUNCTION. */
15288 x86_this_parameter (tree function)
15290 tree type = TREE_TYPE (function);
15294 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15295 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15298 if (ix86_function_regparm (type, function) > 0)
15302 parm = TYPE_ARG_TYPES (type);
15303 /* Figure out whether or not the function has a variable number of
15305 for (; parm; parm = TREE_CHAIN (parm))
15306 if (TREE_VALUE (parm) == void_type_node)
15308 /* If not, the this parameter is in the first argument. */
15312 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15314 return gen_rtx_REG (SImode, regno);
15318 if (aggregate_value_p (TREE_TYPE (type), type))
15319 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15321 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15324 /* Determine whether x86_output_mi_thunk can succeed. */
15327 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15328 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15329 HOST_WIDE_INT vcall_offset, tree function)
15331 /* 64-bit can handle anything. */
15335 /* For 32-bit, everything's fine if we have one free register. */
15336 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15339 /* Need a free register for vcall_offset. */
15343 /* Need a free register for GOT references. */
15344 if (flag_pic && !(*targetm.binds_local_p) (function))
15347 /* Otherwise ok. */
15351 /* Output the assembler code for a thunk function. THUNK_DECL is the
15352 declaration for the thunk function itself, FUNCTION is the decl for
15353 the target function. DELTA is an immediate constant offset to be
15354 added to THIS. If VCALL_OFFSET is nonzero, the word at
15355 *(*this + vcall_offset) should be added to THIS. */
15358 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15359 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15360 HOST_WIDE_INT vcall_offset, tree function)
15363 rtx this = x86_this_parameter (function);
15366 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15367 pull it in now and let DELTA benefit. */
15370 else if (vcall_offset)
15372 /* Put the this parameter into %eax. */
15374 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15375 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15378 this_reg = NULL_RTX;
15380 /* Adjust the this parameter by a fixed constant. */
15383 xops[0] = GEN_INT (delta);
15384 xops[1] = this_reg ? this_reg : this;
15387 if (!x86_64_general_operand (xops[0], DImode))
15389 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15391 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15395 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15398 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15401 /* Adjust the this parameter by a value stored in the vtable. */
15405 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15408 int tmp_regno = 2 /* ECX */;
15409 if (lookup_attribute ("fastcall",
15410 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15411 tmp_regno = 0 /* EAX */;
15412 tmp = gen_rtx_REG (SImode, tmp_regno);
15415 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15418 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15420 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15422 /* Adjust the this parameter. */
15423 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15424 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15426 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15427 xops[0] = GEN_INT (vcall_offset);
15429 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15430 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15432 xops[1] = this_reg;
15434 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15436 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15439 /* If necessary, drop THIS back to its stack slot. */
15440 if (this_reg && this_reg != this)
15442 xops[0] = this_reg;
15444 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15447 xops[0] = XEXP (DECL_RTL (function), 0);
15450 if (!flag_pic || (*targetm.binds_local_p) (function))
15451 output_asm_insn ("jmp\t%P0", xops);
15454 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15455 tmp = gen_rtx_CONST (Pmode, tmp);
15456 tmp = gen_rtx_MEM (QImode, tmp);
15458 output_asm_insn ("jmp\t%A0", xops);
15463 if (!flag_pic || (*targetm.binds_local_p) (function))
15464 output_asm_insn ("jmp\t%P0", xops);
15469 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15470 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15471 tmp = gen_rtx_MEM (QImode, tmp);
15473 output_asm_insn ("jmp\t%0", xops);
15476 #endif /* TARGET_MACHO */
15478 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15479 output_set_got (tmp);
15482 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15483 output_asm_insn ("jmp\t{*}%1", xops);
15489 x86_file_start (void)
15491 default_file_start ();
15492 if (X86_FILE_START_VERSION_DIRECTIVE)
15493 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15494 if (X86_FILE_START_FLTUSED)
15495 fputs ("\t.global\t__fltused\n", asm_out_file);
15496 if (ix86_asm_dialect == ASM_INTEL)
15497 fputs ("\t.intel_syntax\n", asm_out_file);
15501 x86_field_alignment (tree field, int computed)
15503 enum machine_mode mode;
15504 tree type = TREE_TYPE (field);
15506 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15508 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15509 ? get_inner_array_type (type) : type);
15510 if (mode == DFmode || mode == DCmode
15511 || GET_MODE_CLASS (mode) == MODE_INT
15512 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15513 return MIN (32, computed);
15517 /* Output assembler code to FILE to increment profiler label # LABELNO
15518 for profiling a function entry. */
15520 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15525 #ifndef NO_PROFILE_COUNTERS
15526 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15528 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15532 #ifndef NO_PROFILE_COUNTERS
15533 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15535 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15539 #ifndef NO_PROFILE_COUNTERS
15540 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15541 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15543 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15547 #ifndef NO_PROFILE_COUNTERS
15548 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15549 PROFILE_COUNT_REGISTER);
15551 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15555 /* We don't have exact information about the insn sizes, but we may assume
15556 quite safely that we are informed about all 1 byte insns and memory
15557 address sizes. This is enough to eliminate unnecessary padding in
15561 min_insn_size (rtx insn)
15565 if (!INSN_P (insn) || !active_insn_p (insn))
15568 /* Discard alignments we've emit and jump instructions. */
15569 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15570 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15572 if (GET_CODE (insn) == JUMP_INSN
15573 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15574 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15577 /* Important case - calls are always 5 bytes.
15578 It is common to have many calls in the row. */
15579 if (GET_CODE (insn) == CALL_INSN
15580 && symbolic_reference_mentioned_p (PATTERN (insn))
15581 && !SIBLING_CALL_P (insn))
15583 if (get_attr_length (insn) <= 1)
15586 /* For normal instructions we may rely on the sizes of addresses
15587 and the presence of symbol to require 4 bytes of encoding.
15588 This is not the case for jumps where references are PC relative. */
15589 if (GET_CODE (insn) != JUMP_INSN)
15591 l = get_attr_length_address (insn);
15592 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15601 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15605 k8_avoid_jump_misspredicts (void)
15607 rtx insn, start = get_insns ();
15608 int nbytes = 0, njumps = 0;
15611 /* Look for all minimal intervals of instructions containing 4 jumps.
15612 The intervals are bounded by START and INSN. NBYTES is the total
15613 size of instructions in the interval including INSN and not including
15614 START. When the NBYTES is smaller than 16 bytes, it is possible
15615 that the end of START and INSN ends up in the same 16byte page.
15617 The smallest offset in the page INSN can start is the case where START
15618 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15619 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15621 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15624 nbytes += min_insn_size (insn);
15626 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15627 INSN_UID (insn), min_insn_size (insn));
15628 if ((GET_CODE (insn) == JUMP_INSN
15629 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15630 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15631 || GET_CODE (insn) == CALL_INSN)
15638 start = NEXT_INSN (start);
15639 if ((GET_CODE (start) == JUMP_INSN
15640 && GET_CODE (PATTERN (start)) != ADDR_VEC
15641 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15642 || GET_CODE (start) == CALL_INSN)
15643 njumps--, isjump = 1;
15646 nbytes -= min_insn_size (start);
15651 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15652 INSN_UID (start), INSN_UID (insn), nbytes);
15654 if (njumps == 3 && isjump && nbytes < 16)
15656 int padsize = 15 - nbytes + min_insn_size (insn);
15659 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15660 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15665 /* Implement machine specific optimizations.
15666 At the moment we implement single transformation: AMD Athlon works faster
15667 when RET is not destination of conditional jump or directly preceded
15668 by other jump instruction. We avoid the penalty by inserting NOP just
15669 before the RET instructions in such cases. */
15675 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15677 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15679 basic_block bb = e->src;
15682 bool replace = false;
15684 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15685 || !maybe_hot_bb_p (bb))
15687 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15688 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15690 if (prev && GET_CODE (prev) == CODE_LABEL)
15693 for (e = bb->pred; e; e = e->pred_next)
15694 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15695 && !(e->flags & EDGE_FALLTHRU))
15700 prev = prev_active_insn (ret);
15702 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15703 || GET_CODE (prev) == CALL_INSN))
15705 /* Empty functions get branch mispredict even when the jump destination
15706 is not visible to us. */
15707 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15712 emit_insn_before (gen_return_internal_long (), ret);
15716 k8_avoid_jump_misspredicts ();
15719 /* Return nonzero when QImode register that must be represented via REX prefix
15722 x86_extended_QIreg_mentioned_p (rtx insn)
15725 extract_insn_cached (insn);
15726 for (i = 0; i < recog_data.n_operands; i++)
15727 if (REG_P (recog_data.operand[i])
15728 && REGNO (recog_data.operand[i]) >= 4)
15733 /* Return nonzero when P points to register encoded via REX prefix.
15734 Called via for_each_rtx. */
15736 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15738 unsigned int regno;
15741 regno = REGNO (*p);
15742 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15745 /* Return true when INSN mentions register that must be encoded using REX
15748 x86_extended_reg_mentioned_p (rtx insn)
15750 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15753 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15754 optabs would emit if we didn't have TFmode patterns. */
15757 x86_emit_floatuns (rtx operands[2])
15759 rtx neglab, donelab, i0, i1, f0, in, out;
15760 enum machine_mode mode, inmode;
15762 inmode = GET_MODE (operands[1]);
15763 if (inmode != SImode
15764 && inmode != DImode)
15768 in = force_reg (inmode, operands[1]);
15769 mode = GET_MODE (out);
15770 neglab = gen_label_rtx ();
15771 donelab = gen_label_rtx ();
15772 i1 = gen_reg_rtx (Pmode);
15773 f0 = gen_reg_rtx (mode);
15775 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15777 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15778 emit_jump_insn (gen_jump (donelab));
15781 emit_label (neglab);
15783 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15784 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15785 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15786 expand_float (f0, i0, 0);
15787 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15789 emit_label (donelab);
15792 /* Return if we do not know how to pass TYPE solely in registers. */
15794 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15796 if (default_must_pass_in_stack (mode, type))
15798 return (!TARGET_64BIT && type && mode == TImode);
15801 #include "gt-i386.h"