1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
840 rtx base, index, disp;
842 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
845 static int ix86_decompose_address (rtx, struct ix86_address *);
846 static int ix86_address_cost (rtx);
847 static bool ix86_cannot_force_const_mem (rtx);
848 static rtx ix86_delegitimize_address (rtx);
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi (const struct builtin_description *,
853 static rtx ix86_expand_sse_compare (const struct builtin_description *,
855 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
857 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_store_builtin (enum insn_code, tree);
859 static rtx safe_vector_operand (rtx, enum machine_mode);
860 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
861 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
862 enum rtx_code *, enum rtx_code *);
863 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
867 static int ix86_fp_comparison_cost (enum rtx_code code);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame *);
871 static int ix86_comp_type_attributes (tree, tree);
872 static int ix86_function_regparm (tree, tree);
873 const struct attribute_spec ix86_attribute_table[];
874 static bool ix86_function_ok_for_sibcall (tree, tree);
875 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
876 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
877 static int ix86_value_regno (enum machine_mode);
878 static bool contains_128bit_aligned_vector_p (tree);
879 static bool ix86_ms_bitfield_layout_p (tree);
880 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
881 static int extended_reg_mentioned_1 (rtx *, void *);
882 static bool ix86_rtx_costs (rtx, int, int, int *);
883 static int min_insn_size (rtx);
884 static void k8_avoid_jump_misspredicts (void);
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx, int);
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES], int);
917 static int examine_argument (enum machine_mode, tree, int, int *, int *);
918 static rtx construct_container (enum machine_mode, tree, int, int, int,
920 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
921 enum x86_64_reg_class);
923 /* Table of constants used by fldpi, fldln2, etc.... */
924 static REAL_VALUE_TYPE ext_80387_constants_table [5];
925 static bool ext_80387_constants_init = 0;
926 static void init_ext_80387_constants (void);
928 /* Initialize the GCC target structure. */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 # undef TARGET_MERGE_DECL_ATTRIBUTES
933 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981 ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984 ia32_multipass_dfa_lookahead
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1026 #undef TARGET_PROMOTE_PROTOTYPES
1027 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1029 #undef TARGET_STRUCT_VALUE_RTX
1030 #define TARGET_STRUCT_VALUE_RTX hook_rtx_tree_int_null
1032 struct gcc_target targetm = TARGET_INITIALIZER;
1034 /* The svr4 ABI for the i386 says that records and unions are returned
1036 #ifndef DEFAULT_PCC_STRUCT_RETURN
1037 #define DEFAULT_PCC_STRUCT_RETURN 1
1040 /* Sometimes certain combinations of command options do not make
1041 sense on a particular target machine. You can define a macro
1042 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1043 defined, is executed once just after all the command options have
1046 Don't use this macro to turn on various extra optimizations for
1047 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1050 override_options (void)
1053 /* Comes from final.c -- no real reason to change it. */
1054 #define MAX_CODE_ALIGN 16
1058 const struct processor_costs *cost; /* Processor costs */
1059 const int target_enable; /* Target flags to enable. */
1060 const int target_disable; /* Target flags to disable. */
1061 const int align_loop; /* Default alignments. */
1062 const int align_loop_max_skip;
1063 const int align_jump;
1064 const int align_jump_max_skip;
1065 const int align_func;
1067 const processor_target_table[PROCESSOR_max] =
1069 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1070 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1071 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1072 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1073 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1074 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1075 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1076 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1079 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1082 const char *const name; /* processor name or nickname. */
1083 const enum processor_type processor;
1084 const enum pta_flags
1089 PTA_PREFETCH_SSE = 8,
1095 const processor_alias_table[] =
1097 {"i386", PROCESSOR_I386, 0},
1098 {"i486", PROCESSOR_I486, 0},
1099 {"i586", PROCESSOR_PENTIUM, 0},
1100 {"pentium", PROCESSOR_PENTIUM, 0},
1101 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1102 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1103 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1104 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1105 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1106 {"i686", PROCESSOR_PENTIUMPRO, 0},
1107 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1108 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1109 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1110 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1111 PTA_MMX | PTA_PREFETCH_SSE},
1112 {"k6", PROCESSOR_K6, PTA_MMX},
1113 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1114 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1115 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1117 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1118 | PTA_3DNOW | PTA_3DNOW_A},
1119 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1120 | PTA_3DNOW_A | PTA_SSE},
1121 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1122 | PTA_3DNOW_A | PTA_SSE},
1123 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1124 | PTA_3DNOW_A | PTA_SSE},
1125 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1126 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1127 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1128 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1129 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1130 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1131 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1132 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1135 int const pta_size = ARRAY_SIZE (processor_alias_table);
1137 /* Set the default values for switches whose default depends on TARGET_64BIT
1138 in case they weren't overwritten by command line options. */
1141 if (flag_omit_frame_pointer == 2)
1142 flag_omit_frame_pointer = 1;
1143 if (flag_asynchronous_unwind_tables == 2)
1144 flag_asynchronous_unwind_tables = 1;
1145 if (flag_pcc_struct_return == 2)
1146 flag_pcc_struct_return = 0;
1150 if (flag_omit_frame_pointer == 2)
1151 flag_omit_frame_pointer = 0;
1152 if (flag_asynchronous_unwind_tables == 2)
1153 flag_asynchronous_unwind_tables = 0;
1154 if (flag_pcc_struct_return == 2)
1155 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1158 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1159 SUBTARGET_OVERRIDE_OPTIONS;
1162 if (!ix86_tune_string && ix86_arch_string)
1163 ix86_tune_string = ix86_arch_string;
1164 if (!ix86_tune_string)
1165 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1166 if (!ix86_arch_string)
1167 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1169 if (ix86_cmodel_string != 0)
1171 if (!strcmp (ix86_cmodel_string, "small"))
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1174 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1175 else if (!strcmp (ix86_cmodel_string, "32"))
1176 ix86_cmodel = CM_32;
1177 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1178 ix86_cmodel = CM_KERNEL;
1179 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1180 ix86_cmodel = CM_MEDIUM;
1181 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1182 ix86_cmodel = CM_LARGE;
1184 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1188 ix86_cmodel = CM_32;
1190 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1192 if (ix86_asm_string != 0)
1194 if (!strcmp (ix86_asm_string, "intel"))
1195 ix86_asm_dialect = ASM_INTEL;
1196 else if (!strcmp (ix86_asm_string, "att"))
1197 ix86_asm_dialect = ASM_ATT;
1199 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1201 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1202 error ("code model `%s' not supported in the %s bit mode",
1203 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1204 if (ix86_cmodel == CM_LARGE)
1205 sorry ("code model `large' not supported yet");
1206 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1207 sorry ("%i-bit mode not compiled in",
1208 (target_flags & MASK_64BIT) ? 64 : 32);
1210 for (i = 0; i < pta_size; i++)
1211 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1213 ix86_arch = processor_alias_table[i].processor;
1214 /* Default cpu tuning to the architecture. */
1215 ix86_tune = ix86_arch;
1216 if (processor_alias_table[i].flags & PTA_MMX
1217 && !(target_flags_explicit & MASK_MMX))
1218 target_flags |= MASK_MMX;
1219 if (processor_alias_table[i].flags & PTA_3DNOW
1220 && !(target_flags_explicit & MASK_3DNOW))
1221 target_flags |= MASK_3DNOW;
1222 if (processor_alias_table[i].flags & PTA_3DNOW_A
1223 && !(target_flags_explicit & MASK_3DNOW_A))
1224 target_flags |= MASK_3DNOW_A;
1225 if (processor_alias_table[i].flags & PTA_SSE
1226 && !(target_flags_explicit & MASK_SSE))
1227 target_flags |= MASK_SSE;
1228 if (processor_alias_table[i].flags & PTA_SSE2
1229 && !(target_flags_explicit & MASK_SSE2))
1230 target_flags |= MASK_SSE2;
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1234 error ("CPU you selected does not support x86-64 instruction set");
1239 error ("bad value (%s) for -march= switch", ix86_arch_string);
1241 for (i = 0; i < pta_size; i++)
1242 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1244 ix86_tune = processor_alias_table[i].processor;
1245 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1246 error ("CPU you selected does not support x86-64 instruction set");
1249 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1250 x86_prefetch_sse = true;
1252 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1255 ix86_cost = &size_cost;
1257 ix86_cost = processor_target_table[ix86_tune].cost;
1258 target_flags |= processor_target_table[ix86_tune].target_enable;
1259 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1261 /* Arrange to set up i386_stack_locals for all functions. */
1262 init_machine_status = ix86_init_machine_status;
1264 /* Validate -mregparm= value. */
1265 if (ix86_regparm_string)
1267 i = atoi (ix86_regparm_string);
1268 if (i < 0 || i > REGPARM_MAX)
1269 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1275 ix86_regparm = REGPARM_MAX;
1277 /* If the user has provided any of the -malign-* options,
1278 warn and use that value only if -falign-* is not set.
1279 Remove this code in GCC 3.2 or later. */
1280 if (ix86_align_loops_string)
1282 warning ("-malign-loops is obsolete, use -falign-loops");
1283 if (align_loops == 0)
1285 i = atoi (ix86_align_loops_string);
1286 if (i < 0 || i > MAX_CODE_ALIGN)
1287 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1289 align_loops = 1 << i;
1293 if (ix86_align_jumps_string)
1295 warning ("-malign-jumps is obsolete, use -falign-jumps");
1296 if (align_jumps == 0)
1298 i = atoi (ix86_align_jumps_string);
1299 if (i < 0 || i > MAX_CODE_ALIGN)
1300 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1302 align_jumps = 1 << i;
1306 if (ix86_align_funcs_string)
1308 warning ("-malign-functions is obsolete, use -falign-functions");
1309 if (align_functions == 0)
1311 i = atoi (ix86_align_funcs_string);
1312 if (i < 0 || i > MAX_CODE_ALIGN)
1313 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1315 align_functions = 1 << i;
1319 /* Default align_* from the processor table. */
1320 if (align_loops == 0)
1322 align_loops = processor_target_table[ix86_tune].align_loop;
1323 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1325 if (align_jumps == 0)
1327 align_jumps = processor_target_table[ix86_tune].align_jump;
1328 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1330 if (align_functions == 0)
1332 align_functions = processor_target_table[ix86_tune].align_func;
1335 /* Validate -mpreferred-stack-boundary= value, or provide default.
1336 The default of 128 bits is for Pentium III's SSE __m128, but we
1337 don't want additional code to keep the stack aligned when
1338 optimizing for code size. */
1339 ix86_preferred_stack_boundary = (optimize_size
1340 ? TARGET_64BIT ? 128 : 32
1342 if (ix86_preferred_stack_boundary_string)
1344 i = atoi (ix86_preferred_stack_boundary_string);
1345 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1346 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1347 TARGET_64BIT ? 4 : 2);
1349 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1352 /* Validate -mbranch-cost= value, or provide default. */
1353 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1354 if (ix86_branch_cost_string)
1356 i = atoi (ix86_branch_cost_string);
1358 error ("-mbranch-cost=%d is not between 0 and 5", i);
1360 ix86_branch_cost = i;
1363 if (ix86_tls_dialect_string)
1365 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1366 ix86_tls_dialect = TLS_DIALECT_GNU;
1367 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1368 ix86_tls_dialect = TLS_DIALECT_SUN;
1370 error ("bad value (%s) for -mtls-dialect= switch",
1371 ix86_tls_dialect_string);
1374 /* Keep nonleaf frame pointers. */
1375 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1376 flag_omit_frame_pointer = 1;
1378 /* If we're doing fast math, we don't care about comparison order
1379 wrt NaNs. This lets us use a shorter comparison sequence. */
1380 if (flag_unsafe_math_optimizations)
1381 target_flags &= ~MASK_IEEE_FP;
1383 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1384 since the insns won't need emulation. */
1385 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1386 target_flags &= ~MASK_NO_FANCY_MATH_387;
1388 /* Turn on SSE2 builtins for -mpni. */
1390 target_flags |= MASK_SSE2;
1392 /* Turn on SSE builtins for -msse2. */
1394 target_flags |= MASK_SSE;
1398 if (TARGET_ALIGN_DOUBLE)
1399 error ("-malign-double makes no sense in the 64bit mode");
1401 error ("-mrtd calling convention not supported in the 64bit mode");
1402 /* Enable by default the SSE and MMX builtins. */
1403 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1404 ix86_fpmath = FPMATH_SSE;
1408 ix86_fpmath = FPMATH_387;
1409 /* i386 ABI does not specify red zone. It still makes sense to use it
1410 when programmer takes care to stack from being destroyed. */
1411 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1412 target_flags |= MASK_NO_RED_ZONE;
1415 if (ix86_fpmath_string != 0)
1417 if (! strcmp (ix86_fpmath_string, "387"))
1418 ix86_fpmath = FPMATH_387;
1419 else if (! strcmp (ix86_fpmath_string, "sse"))
1423 warning ("SSE instruction set disabled, using 387 arithmetics");
1424 ix86_fpmath = FPMATH_387;
1427 ix86_fpmath = FPMATH_SSE;
1429 else if (! strcmp (ix86_fpmath_string, "387,sse")
1430 || ! strcmp (ix86_fpmath_string, "sse,387"))
1434 warning ("SSE instruction set disabled, using 387 arithmetics");
1435 ix86_fpmath = FPMATH_387;
1437 else if (!TARGET_80387)
1439 warning ("387 instruction set disabled, using SSE arithmetics");
1440 ix86_fpmath = FPMATH_SSE;
1443 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1446 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1449 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1453 target_flags |= MASK_MMX;
1454 x86_prefetch_sse = true;
1457 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1460 target_flags |= MASK_MMX;
1461 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1462 extensions it adds. */
1463 if (x86_3dnow_a & (1 << ix86_arch))
1464 target_flags |= MASK_3DNOW_A;
1466 if ((x86_accumulate_outgoing_args & TUNEMASK)
1467 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1469 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1471 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1474 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1475 p = strchr (internal_label_prefix, 'X');
1476 internal_label_prefix_len = p - internal_label_prefix;
1482 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1484 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1485 make the problem with not enough registers even worse. */
1486 #ifdef INSN_SCHEDULING
1488 flag_schedule_insns = 0;
1491 /* The default values of these switches depend on the TARGET_64BIT
1492 that is not known at this moment. Mark these values with 2 and
1493 let user the to override these. In case there is no command line option
1494 specifying them, we will set the defaults in override_options. */
1496 flag_omit_frame_pointer = 2;
1497 flag_pcc_struct_return = 2;
1498 flag_asynchronous_unwind_tables = 2;
1501 /* Table of valid machine attributes. */
1502 const struct attribute_spec ix86_attribute_table[] =
1504 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1505 /* Stdcall attribute says callee is responsible for popping arguments
1506 if they are not variable. */
1507 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1508 /* Fastcall attribute says callee is responsible for popping arguments
1509 if they are not variable. */
1510 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1511 /* Cdecl attribute says the callee is a normal C declaration */
1512 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1513 /* Regparm attribute specifies how many integer arguments are to be
1514 passed in registers. */
1515 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1516 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1517 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1518 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1519 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1521 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1522 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1523 { NULL, 0, 0, false, false, false, NULL }
1526 /* Decide whether we can make a sibling call to a function. DECL is the
1527 declaration of the function being targeted by the call and EXP is the
1528 CALL_EXPR representing the call. */
1531 ix86_function_ok_for_sibcall (tree decl, tree exp)
1533 /* If we are generating position-independent code, we cannot sibcall
1534 optimize any indirect call, or a direct call to a global function,
1535 as the PLT requires %ebx be live. */
1536 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1539 /* If we are returning floats on the 80387 register stack, we cannot
1540 make a sibcall from a function that doesn't return a float to a
1541 function that does or, conversely, from a function that does return
1542 a float to a function that doesn't; the necessary stack adjustment
1543 would not be executed. */
1544 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1545 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1548 /* If this call is indirect, we'll need to be able to use a call-clobbered
1549 register for the address of the target function. Make sure that all
1550 such registers are not used for passing parameters. */
1551 if (!decl && !TARGET_64BIT)
1555 /* We're looking at the CALL_EXPR, we need the type of the function. */
1556 type = TREE_OPERAND (exp, 0); /* pointer expression */
1557 type = TREE_TYPE (type); /* pointer type */
1558 type = TREE_TYPE (type); /* function type */
1560 if (ix86_function_regparm (type, NULL) >= 3)
1562 /* ??? Need to count the actual number of registers to be used,
1563 not the possible number of registers. Fix later. */
1568 /* Otherwise okay. That also includes certain types of indirect calls. */
1572 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1573 arguments as in struct attribute_spec.handler. */
1575 ix86_handle_cdecl_attribute (tree *node, tree name,
1576 tree args ATTRIBUTE_UNUSED,
1577 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1579 if (TREE_CODE (*node) != FUNCTION_TYPE
1580 && TREE_CODE (*node) != METHOD_TYPE
1581 && TREE_CODE (*node) != FIELD_DECL
1582 && TREE_CODE (*node) != TYPE_DECL)
1584 warning ("`%s' attribute only applies to functions",
1585 IDENTIFIER_POINTER (name));
1586 *no_add_attrs = true;
1590 if (is_attribute_p ("fastcall", name))
1592 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1594 error ("fastcall and stdcall attributes are not compatible");
1596 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1598 error ("fastcall and regparm attributes are not compatible");
1601 else if (is_attribute_p ("stdcall", name))
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1605 error ("fastcall and stdcall attributes are not compatible");
1612 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1613 *no_add_attrs = true;
1619 /* Handle a "regparm" attribute;
1620 arguments as in struct attribute_spec.handler. */
1622 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1623 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1625 if (TREE_CODE (*node) != FUNCTION_TYPE
1626 && TREE_CODE (*node) != METHOD_TYPE
1627 && TREE_CODE (*node) != FIELD_DECL
1628 && TREE_CODE (*node) != TYPE_DECL)
1630 warning ("`%s' attribute only applies to functions",
1631 IDENTIFIER_POINTER (name));
1632 *no_add_attrs = true;
1638 cst = TREE_VALUE (args);
1639 if (TREE_CODE (cst) != INTEGER_CST)
1641 warning ("`%s' attribute requires an integer constant argument",
1642 IDENTIFIER_POINTER (name));
1643 *no_add_attrs = true;
1645 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1647 warning ("argument to `%s' attribute larger than %d",
1648 IDENTIFIER_POINTER (name), REGPARM_MAX);
1649 *no_add_attrs = true;
1652 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1654 error ("fastcall and regparm attributes are not compatible");
1661 /* Return 0 if the attributes for two types are incompatible, 1 if they
1662 are compatible, and 2 if they are nearly compatible (which causes a
1663 warning to be generated). */
1666 ix86_comp_type_attributes (tree type1, tree type2)
1668 /* Check for mismatch of non-default calling convention. */
1669 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1671 if (TREE_CODE (type1) != FUNCTION_TYPE)
1674 /* Check for mismatched fastcall types */
1675 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1676 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1679 /* Check for mismatched return types (cdecl vs stdcall). */
1680 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1681 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1686 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1687 DECL may be NULL when calling function indirectly
1688 or considering a libcall. */
1691 ix86_function_regparm (tree type, tree decl)
1694 int regparm = ix86_regparm;
1695 bool user_convention = false;
1699 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1702 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1703 user_convention = true;
1706 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1709 user_convention = true;
1712 /* Use register calling convention for local functions when possible. */
1713 if (!TARGET_64BIT && !user_convention && decl
1714 && flag_unit_at_a_time && !profile_flag)
1716 struct cgraph_local_info *i = cgraph_local_info (decl);
1719 /* We can't use regparm(3) for nested functions as these use
1720 static chain pointer in third argument. */
1721 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1731 /* Return true if EAX is live at the start of the function. Used by
1732 ix86_expand_prologue to determine if we need special help before
1733 calling allocate_stack_worker. */
1736 ix86_eax_live_at_start_p (void)
1738 /* Cheat. Don't bother working forward from ix86_function_regparm
1739 to the function type to whether an actual argument is located in
1740 eax. Instead just look at cfg info, which is still close enough
1741 to correct at this point. This gives false positives for broken
1742 functions that might use uninitialized data that happens to be
1743 allocated in eax, but who cares? */
1744 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1747 /* Value is the number of bytes of arguments automatically
1748 popped when returning from a subroutine call.
1749 FUNDECL is the declaration node of the function (as a tree),
1750 FUNTYPE is the data type of the function (as a tree),
1751 or for a library call it is an identifier node for the subroutine name.
1752 SIZE is the number of bytes of arguments passed on the stack.
1754 On the 80386, the RTD insn may be used to pop them if the number
1755 of args is fixed, but if the number is variable then the caller
1756 must pop them all. RTD can't be used for library calls now
1757 because the library is compiled with the Unix compiler.
1758 Use of RTD is a selectable option, since it is incompatible with
1759 standard Unix calling sequences. If the option is not selected,
1760 the caller must always pop the args.
1762 The attribute stdcall is equivalent to RTD on a per module basis. */
1765 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1767 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1769 /* Cdecl functions override -mrtd, and never pop the stack. */
1770 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1772 /* Stdcall and fastcall functions will pop the stack if not
1774 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1775 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1779 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1780 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1781 == void_type_node)))
1785 /* Lose any fake structure return argument if it is passed on the stack. */
1786 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1789 int nregs = ix86_function_regparm (funtype, fundecl);
1792 return GET_MODE_SIZE (Pmode);
1798 /* Argument support functions. */
1800 /* Return true when register may be used to pass function parameters. */
1802 ix86_function_arg_regno_p (int regno)
1806 return (regno < REGPARM_MAX
1807 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1808 if (SSE_REGNO_P (regno) && TARGET_SSE)
1810 /* RAX is used as hidden argument to va_arg functions. */
1813 for (i = 0; i < REGPARM_MAX; i++)
1814 if (regno == x86_64_int_parameter_registers[i])
1819 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1820 for a call to a function whose data type is FNTYPE.
1821 For a library call, FNTYPE is 0. */
1824 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1825 tree fntype, /* tree ptr for function decl */
1826 rtx libname, /* SYMBOL_REF of library name or 0 */
1829 static CUMULATIVE_ARGS zero_cum;
1830 tree param, next_param;
1832 if (TARGET_DEBUG_ARG)
1834 fprintf (stderr, "\ninit_cumulative_args (");
1836 fprintf (stderr, "fntype code = %s, ret code = %s",
1837 tree_code_name[(int) TREE_CODE (fntype)],
1838 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1840 fprintf (stderr, "no fntype");
1843 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1848 /* Set up the number of registers to use for passing arguments. */
1850 cum->nregs = ix86_function_regparm (fntype, fndecl);
1852 cum->nregs = ix86_regparm;
1853 cum->sse_nregs = SSE_REGPARM_MAX;
1854 cum->mmx_nregs = MMX_REGPARM_MAX;
1855 cum->warn_sse = true;
1856 cum->warn_mmx = true;
1857 cum->maybe_vaarg = false;
1859 /* Use ecx and edx registers if function has fastcall attribute */
1860 if (fntype && !TARGET_64BIT)
1862 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1870 /* Determine if this function has variable arguments. This is
1871 indicated by the last argument being 'void_type_mode' if there
1872 are no variable arguments. If there are variable arguments, then
1873 we won't pass anything in registers */
1875 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1877 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1878 param != 0; param = next_param)
1880 next_param = TREE_CHAIN (param);
1881 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1892 cum->maybe_vaarg = true;
1896 if ((!fntype && !libname)
1897 || (fntype && !TYPE_ARG_TYPES (fntype)))
1898 cum->maybe_vaarg = 1;
1900 if (TARGET_DEBUG_ARG)
1901 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1906 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1907 of this code is to classify each 8bytes of incoming argument by the register
1908 class and assign registers accordingly. */
1910 /* Return the union class of CLASS1 and CLASS2.
1911 See the x86-64 PS ABI for details. */
1913 static enum x86_64_reg_class
1914 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1916 /* Rule #1: If both classes are equal, this is the resulting class. */
1917 if (class1 == class2)
1920 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1922 if (class1 == X86_64_NO_CLASS)
1924 if (class2 == X86_64_NO_CLASS)
1927 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1928 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1929 return X86_64_MEMORY_CLASS;
1931 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1932 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1933 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1934 return X86_64_INTEGERSI_CLASS;
1935 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1936 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1937 return X86_64_INTEGER_CLASS;
1939 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1940 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1941 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1942 return X86_64_MEMORY_CLASS;
1944 /* Rule #6: Otherwise class SSE is used. */
1945 return X86_64_SSE_CLASS;
1948 /* Classify the argument of type TYPE and mode MODE.
1949 CLASSES will be filled by the register class used to pass each word
1950 of the operand. The number of words is returned. In case the parameter
1951 should be passed in memory, 0 is returned. As a special case for zero
1952 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1954 BIT_OFFSET is used internally for handling records and specifies offset
1955 of the offset in bits modulo 256 to avoid overflow cases.
1957 See the x86-64 PS ABI for details.
1961 classify_argument (enum machine_mode mode, tree type,
1962 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1964 HOST_WIDE_INT bytes =
1965 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1966 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1968 /* Variable sized entities are always passed/returned in memory. */
1972 if (mode != VOIDmode
1973 && MUST_PASS_IN_STACK (mode, type))
1976 if (type && AGGREGATE_TYPE_P (type))
1980 enum x86_64_reg_class subclasses[MAX_CLASSES];
1982 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1986 for (i = 0; i < words; i++)
1987 classes[i] = X86_64_NO_CLASS;
1989 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1990 signalize memory class, so handle it as special case. */
1993 classes[0] = X86_64_NO_CLASS;
1997 /* Classify each field of record and merge classes. */
1998 if (TREE_CODE (type) == RECORD_TYPE)
2000 /* For classes first merge in the field of the subclasses. */
2001 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2003 tree bases = TYPE_BINFO_BASETYPES (type);
2004 int n_bases = TREE_VEC_LENGTH (bases);
2007 for (i = 0; i < n_bases; ++i)
2009 tree binfo = TREE_VEC_ELT (bases, i);
2011 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2012 tree type = BINFO_TYPE (binfo);
2014 num = classify_argument (TYPE_MODE (type),
2016 (offset + bit_offset) % 256);
2019 for (i = 0; i < num; i++)
2021 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2023 merge_classes (subclasses[i], classes[i + pos]);
2027 /* And now merge the fields of structure. */
2028 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2030 if (TREE_CODE (field) == FIELD_DECL)
2034 /* Bitfields are always classified as integer. Handle them
2035 early, since later code would consider them to be
2036 misaligned integers. */
2037 if (DECL_BIT_FIELD (field))
2039 for (i = int_bit_position (field) / 8 / 8;
2040 i < (int_bit_position (field)
2041 + tree_low_cst (DECL_SIZE (field), 0)
2044 merge_classes (X86_64_INTEGER_CLASS,
2049 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2050 TREE_TYPE (field), subclasses,
2051 (int_bit_position (field)
2052 + bit_offset) % 256);
2055 for (i = 0; i < num; i++)
2058 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2060 merge_classes (subclasses[i], classes[i + pos]);
2066 /* Arrays are handled as small records. */
2067 else if (TREE_CODE (type) == ARRAY_TYPE)
2070 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2071 TREE_TYPE (type), subclasses, bit_offset);
2075 /* The partial classes are now full classes. */
2076 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2077 subclasses[0] = X86_64_SSE_CLASS;
2078 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2079 subclasses[0] = X86_64_INTEGER_CLASS;
2081 for (i = 0; i < words; i++)
2082 classes[i] = subclasses[i % num];
2084 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2085 else if (TREE_CODE (type) == UNION_TYPE
2086 || TREE_CODE (type) == QUAL_UNION_TYPE)
2088 /* For classes first merge in the field of the subclasses. */
2089 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2091 tree bases = TYPE_BINFO_BASETYPES (type);
2092 int n_bases = TREE_VEC_LENGTH (bases);
2095 for (i = 0; i < n_bases; ++i)
2097 tree binfo = TREE_VEC_ELT (bases, i);
2099 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2100 tree type = BINFO_TYPE (binfo);
2102 num = classify_argument (TYPE_MODE (type),
2104 (offset + (bit_offset % 64)) % 256);
2107 for (i = 0; i < num; i++)
2109 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2111 merge_classes (subclasses[i], classes[i + pos]);
2115 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2117 if (TREE_CODE (field) == FIELD_DECL)
2120 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2121 TREE_TYPE (field), subclasses,
2125 for (i = 0; i < num; i++)
2126 classes[i] = merge_classes (subclasses[i], classes[i]);
2130 else if (TREE_CODE (type) == SET_TYPE)
2134 classes[0] = X86_64_INTEGERSI_CLASS;
2137 else if (bytes <= 8)
2139 classes[0] = X86_64_INTEGER_CLASS;
2142 else if (bytes <= 12)
2144 classes[0] = X86_64_INTEGER_CLASS;
2145 classes[1] = X86_64_INTEGERSI_CLASS;
2150 classes[0] = X86_64_INTEGER_CLASS;
2151 classes[1] = X86_64_INTEGER_CLASS;
2158 /* Final merger cleanup. */
2159 for (i = 0; i < words; i++)
2161 /* If one class is MEMORY, everything should be passed in
2163 if (classes[i] == X86_64_MEMORY_CLASS)
2166 /* The X86_64_SSEUP_CLASS should be always preceded by
2167 X86_64_SSE_CLASS. */
2168 if (classes[i] == X86_64_SSEUP_CLASS
2169 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2170 classes[i] = X86_64_SSE_CLASS;
2172 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2173 if (classes[i] == X86_64_X87UP_CLASS
2174 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2175 classes[i] = X86_64_SSE_CLASS;
2180 /* Compute alignment needed. We align all types to natural boundaries with
2181 exception of XFmode that is aligned to 64bits. */
2182 if (mode != VOIDmode && mode != BLKmode)
2184 int mode_alignment = GET_MODE_BITSIZE (mode);
2187 mode_alignment = 128;
2188 else if (mode == XCmode)
2189 mode_alignment = 256;
2190 /* Misaligned fields are always returned in memory. */
2191 if (bit_offset % mode_alignment)
2195 /* Classification of atomic types. */
2205 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2206 classes[0] = X86_64_INTEGERSI_CLASS;
2208 classes[0] = X86_64_INTEGER_CLASS;
2212 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2215 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2216 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2219 if (!(bit_offset % 64))
2220 classes[0] = X86_64_SSESF_CLASS;
2222 classes[0] = X86_64_SSE_CLASS;
2225 classes[0] = X86_64_SSEDF_CLASS;
2228 classes[0] = X86_64_X87_CLASS;
2229 classes[1] = X86_64_X87UP_CLASS;
2235 classes[0] = X86_64_X87_CLASS;
2236 classes[1] = X86_64_X87UP_CLASS;
2237 classes[2] = X86_64_X87_CLASS;
2238 classes[3] = X86_64_X87UP_CLASS;
2241 classes[0] = X86_64_SSEDF_CLASS;
2242 classes[1] = X86_64_SSEDF_CLASS;
2245 classes[0] = X86_64_SSE_CLASS;
2253 classes[0] = X86_64_SSE_CLASS;
2254 classes[1] = X86_64_SSEUP_CLASS;
2269 /* Examine the argument and return set number of register required in each
2270 class. Return 0 iff parameter should be passed in memory. */
2272 examine_argument (enum machine_mode mode, tree type, int in_return,
2273 int *int_nregs, int *sse_nregs)
2275 enum x86_64_reg_class class[MAX_CLASSES];
2276 int n = classify_argument (mode, type, class, 0);
2282 for (n--; n >= 0; n--)
2285 case X86_64_INTEGER_CLASS:
2286 case X86_64_INTEGERSI_CLASS:
2289 case X86_64_SSE_CLASS:
2290 case X86_64_SSESF_CLASS:
2291 case X86_64_SSEDF_CLASS:
2294 case X86_64_NO_CLASS:
2295 case X86_64_SSEUP_CLASS:
2297 case X86_64_X87_CLASS:
2298 case X86_64_X87UP_CLASS:
2302 case X86_64_MEMORY_CLASS:
2307 /* Construct container for the argument used by GCC interface. See
2308 FUNCTION_ARG for the detailed description. */
2310 construct_container (enum machine_mode mode, tree type, int in_return,
2311 int nintregs, int nsseregs, const int * intreg,
2314 enum machine_mode tmpmode;
2316 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2317 enum x86_64_reg_class class[MAX_CLASSES];
2321 int needed_sseregs, needed_intregs;
2322 rtx exp[MAX_CLASSES];
2325 n = classify_argument (mode, type, class, 0);
2326 if (TARGET_DEBUG_ARG)
2329 fprintf (stderr, "Memory class\n");
2332 fprintf (stderr, "Classes:");
2333 for (i = 0; i < n; i++)
2335 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2337 fprintf (stderr, "\n");
2342 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2344 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2347 /* First construct simple cases. Avoid SCmode, since we want to use
2348 single register to pass this type. */
2349 if (n == 1 && mode != SCmode)
2352 case X86_64_INTEGER_CLASS:
2353 case X86_64_INTEGERSI_CLASS:
2354 return gen_rtx_REG (mode, intreg[0]);
2355 case X86_64_SSE_CLASS:
2356 case X86_64_SSESF_CLASS:
2357 case X86_64_SSEDF_CLASS:
2358 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2359 case X86_64_X87_CLASS:
2360 return gen_rtx_REG (mode, FIRST_STACK_REG);
2361 case X86_64_NO_CLASS:
2362 /* Zero sized array, struct or class. */
2367 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2368 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2370 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2371 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2372 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2373 && class[1] == X86_64_INTEGER_CLASS
2374 && (mode == CDImode || mode == TImode || mode == TFmode)
2375 && intreg[0] + 1 == intreg[1])
2376 return gen_rtx_REG (mode, intreg[0]);
2378 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2379 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2380 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2382 /* Otherwise figure out the entries of the PARALLEL. */
2383 for (i = 0; i < n; i++)
2387 case X86_64_NO_CLASS:
2389 case X86_64_INTEGER_CLASS:
2390 case X86_64_INTEGERSI_CLASS:
2391 /* Merge TImodes on aligned occasions here too. */
2392 if (i * 8 + 8 > bytes)
2393 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2394 else if (class[i] == X86_64_INTEGERSI_CLASS)
2398 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2399 if (tmpmode == BLKmode)
2401 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2402 gen_rtx_REG (tmpmode, *intreg),
2406 case X86_64_SSESF_CLASS:
2407 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2408 gen_rtx_REG (SFmode,
2409 SSE_REGNO (sse_regno)),
2413 case X86_64_SSEDF_CLASS:
2414 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2415 gen_rtx_REG (DFmode,
2416 SSE_REGNO (sse_regno)),
2420 case X86_64_SSE_CLASS:
2421 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2425 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2426 gen_rtx_REG (tmpmode,
2427 SSE_REGNO (sse_regno)),
2429 if (tmpmode == TImode)
2437 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2438 for (i = 0; i < nexps; i++)
2439 XVECEXP (ret, 0, i) = exp [i];
2443 /* Update the data in CUM to advance over an argument
2444 of mode MODE and data type TYPE.
2445 (TYPE is null for libcalls where that information may not be available.) */
2448 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2449 enum machine_mode mode, /* current arg mode */
2450 tree type, /* type of the argument or 0 if lib support */
2451 int named) /* whether or not the argument was named */
2454 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2455 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2457 if (TARGET_DEBUG_ARG)
2459 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2460 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2463 int int_nregs, sse_nregs;
2464 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2465 cum->words += words;
2466 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2468 cum->nregs -= int_nregs;
2469 cum->sse_nregs -= sse_nregs;
2470 cum->regno += int_nregs;
2471 cum->sse_regno += sse_nregs;
2474 cum->words += words;
2478 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2479 && (!type || !AGGREGATE_TYPE_P (type)))
2481 cum->sse_words += words;
2482 cum->sse_nregs -= 1;
2483 cum->sse_regno += 1;
2484 if (cum->sse_nregs <= 0)
2490 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2491 && (!type || !AGGREGATE_TYPE_P (type)))
2493 cum->mmx_words += words;
2494 cum->mmx_nregs -= 1;
2495 cum->mmx_regno += 1;
2496 if (cum->mmx_nregs <= 0)
2504 cum->words += words;
2505 cum->nregs -= words;
2506 cum->regno += words;
2508 if (cum->nregs <= 0)
2518 /* Define where to put the arguments to a function.
2519 Value is zero to push the argument on the stack,
2520 or a hard register in which to store the argument.
2522 MODE is the argument's machine mode.
2523 TYPE is the data type of the argument (as a tree).
2524 This is null for libcalls where that information may
2526 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2527 the preceding args and about the function being called.
2528 NAMED is nonzero if this argument is a named parameter
2529 (otherwise it is an extra parameter matching an ellipsis). */
2532 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2533 enum machine_mode mode, /* current arg mode */
2534 tree type, /* type of the argument or 0 if lib support */
2535 int named) /* != 0 for normal args, == 0 for ... args */
2539 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2540 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2541 static bool warnedsse, warnedmmx;
2543 /* Handle a hidden AL argument containing number of registers for varargs
2544 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2546 if (mode == VOIDmode)
2549 return GEN_INT (cum->maybe_vaarg
2550 ? (cum->sse_nregs < 0
2558 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2559 &x86_64_int_parameter_registers [cum->regno],
2564 /* For now, pass fp/complex values on the stack. */
2576 if (words <= cum->nregs)
2578 int regno = cum->regno;
2580 /* Fastcall allocates the first two DWORD (SImode) or
2581 smaller arguments to ECX and EDX. */
2584 if (mode == BLKmode || mode == DImode)
2587 /* ECX not EAX is the first allocated register. */
2591 ret = gen_rtx_REG (mode, regno);
2601 if (!type || !AGGREGATE_TYPE_P (type))
2603 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2606 warning ("SSE vector argument without SSE enabled "
2610 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2617 if (!type || !AGGREGATE_TYPE_P (type))
2619 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2622 warning ("MMX vector argument without MMX enabled "
2626 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2631 if (TARGET_DEBUG_ARG)
2634 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2635 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2638 print_simple_rtl (stderr, ret);
2640 fprintf (stderr, ", stack");
2642 fprintf (stderr, " )\n");
2648 /* A C expression that indicates when an argument must be passed by
2649 reference. If nonzero for an argument, a copy of that argument is
2650 made in memory and a pointer to the argument is passed instead of
2651 the argument itself. The pointer is passed in whatever way is
2652 appropriate for passing a pointer to that type. */
2655 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2656 enum machine_mode mode ATTRIBUTE_UNUSED,
2657 tree type, int named ATTRIBUTE_UNUSED)
2662 if (type && int_size_in_bytes (type) == -1)
2664 if (TARGET_DEBUG_ARG)
2665 fprintf (stderr, "function_arg_pass_by_reference\n");
2672 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2675 contains_128bit_aligned_vector_p (tree type)
2677 enum machine_mode mode = TYPE_MODE (type);
2678 if (SSE_REG_MODE_P (mode)
2679 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2681 if (TYPE_ALIGN (type) < 128)
2684 if (AGGREGATE_TYPE_P (type))
2686 /* Walk the aggregates recursively. */
2687 if (TREE_CODE (type) == RECORD_TYPE
2688 || TREE_CODE (type) == UNION_TYPE
2689 || TREE_CODE (type) == QUAL_UNION_TYPE)
2693 if (TYPE_BINFO (type) != NULL
2694 && TYPE_BINFO_BASETYPES (type) != NULL)
2696 tree bases = TYPE_BINFO_BASETYPES (type);
2697 int n_bases = TREE_VEC_LENGTH (bases);
2700 for (i = 0; i < n_bases; ++i)
2702 tree binfo = TREE_VEC_ELT (bases, i);
2703 tree type = BINFO_TYPE (binfo);
2705 if (contains_128bit_aligned_vector_p (type))
2709 /* And now merge the fields of structure. */
2710 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2712 if (TREE_CODE (field) == FIELD_DECL
2713 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2717 /* Just for use if some languages passes arrays by value. */
2718 else if (TREE_CODE (type) == ARRAY_TYPE)
2720 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2729 /* Gives the alignment boundary, in bits, of an argument with the
2730 specified mode and type. */
2733 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2737 align = TYPE_ALIGN (type);
2739 align = GET_MODE_ALIGNMENT (mode);
2740 if (align < PARM_BOUNDARY)
2741 align = PARM_BOUNDARY;
2744 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2745 make an exception for SSE modes since these require 128bit
2748 The handling here differs from field_alignment. ICC aligns MMX
2749 arguments to 4 byte boundaries, while structure fields are aligned
2750 to 8 byte boundaries. */
2753 if (!SSE_REG_MODE_P (mode))
2754 align = PARM_BOUNDARY;
2758 if (!contains_128bit_aligned_vector_p (type))
2759 align = PARM_BOUNDARY;
2767 /* Return true if N is a possible register number of function value. */
2769 ix86_function_value_regno_p (int regno)
2773 return ((regno) == 0
2774 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2775 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2777 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2778 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2779 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2782 /* Define how to find the value returned by a function.
2783 VALTYPE is the data type of the value (as a tree).
2784 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2785 otherwise, FUNC is 0. */
2787 ix86_function_value (tree valtype)
2791 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2792 REGPARM_MAX, SSE_REGPARM_MAX,
2793 x86_64_int_return_registers, 0);
2794 /* For zero sized structures, construct_container return NULL, but we need
2795 to keep rest of compiler happy by returning meaningful value. */
2797 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2801 return gen_rtx_REG (TYPE_MODE (valtype),
2802 ix86_value_regno (TYPE_MODE (valtype)));
2805 /* Return false iff type is returned in memory. */
2807 ix86_return_in_memory (tree type)
2809 int needed_intregs, needed_sseregs, size;
2810 enum machine_mode mode = TYPE_MODE (type);
2813 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2815 if (mode == BLKmode)
2818 size = int_size_in_bytes (type);
2820 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2823 if (VECTOR_MODE_P (mode) || mode == TImode)
2825 /* User-created vectors small enough to fit in EAX. */
2829 /* MMX/3dNow values are returned on the stack, since we've
2830 got to EMMS/FEMMS before returning. */
2834 /* SSE values are returned in XMM0. */
2835 /* ??? Except when it doesn't exist? We have a choice of
2836 either (1) being abi incompatible with a -march switch,
2837 or (2) generating an error here. Given no good solution,
2838 I think the safest thing is one warning. The user won't
2839 be able to use -Werror, but.... */
2850 warning ("SSE vector return without SSE enabled "
2865 /* Define how to find the value returned by a library function
2866 assuming the value has mode MODE. */
2868 ix86_libcall_value (enum machine_mode mode)
2878 return gen_rtx_REG (mode, FIRST_SSE_REG);
2881 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2886 return gen_rtx_REG (mode, 0);
2890 return gen_rtx_REG (mode, ix86_value_regno (mode));
2893 /* Given a mode, return the register to use for a return value. */
2896 ix86_value_regno (enum machine_mode mode)
2898 /* Floating point return values in %st(0). */
2899 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2900 return FIRST_FLOAT_REG;
2901 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2902 we prevent this case when sse is not available. */
2903 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2904 return FIRST_SSE_REG;
2905 /* Everything else in %eax. */
2909 /* Create the va_list data type. */
2912 ix86_build_builtin_va_list (void)
2914 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2916 /* For i386 we use plain pointer to argument area. */
2918 return build_pointer_type (char_type_node);
2920 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2921 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2923 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2924 unsigned_type_node);
2925 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2926 unsigned_type_node);
2927 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2929 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2932 DECL_FIELD_CONTEXT (f_gpr) = record;
2933 DECL_FIELD_CONTEXT (f_fpr) = record;
2934 DECL_FIELD_CONTEXT (f_ovf) = record;
2935 DECL_FIELD_CONTEXT (f_sav) = record;
2937 TREE_CHAIN (record) = type_decl;
2938 TYPE_NAME (record) = type_decl;
2939 TYPE_FIELDS (record) = f_gpr;
2940 TREE_CHAIN (f_gpr) = f_fpr;
2941 TREE_CHAIN (f_fpr) = f_ovf;
2942 TREE_CHAIN (f_ovf) = f_sav;
2944 layout_type (record);
2946 /* The correct type is an array type of one element. */
2947 return build_array_type (record, build_index_type (size_zero_node));
2950 /* Perform any needed actions needed for a function that is receiving a
2951 variable number of arguments.
2955 MODE and TYPE are the mode and type of the current parameter.
2957 PRETEND_SIZE is a variable that should be set to the amount of stack
2958 that must be pushed by the prolog to pretend that our caller pushed
2961 Normally, this macro will push all remaining incoming registers on the
2962 stack and set PRETEND_SIZE to the length of the registers pushed. */
2965 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2966 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2969 CUMULATIVE_ARGS next_cum;
2970 rtx save_area = NULL_RTX, mem;
2983 /* Indicate to allocate space on the stack for varargs save area. */
2984 ix86_save_varrargs_registers = 1;
2986 cfun->stack_alignment_needed = 128;
2988 fntype = TREE_TYPE (current_function_decl);
2989 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2990 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2991 != void_type_node));
2993 /* For varargs, we do not want to skip the dummy va_dcl argument.
2994 For stdargs, we do want to skip the last named argument. */
2997 function_arg_advance (&next_cum, mode, type, 1);
3000 save_area = frame_pointer_rtx;
3002 set = get_varargs_alias_set ();
3004 for (i = next_cum.regno; i < ix86_regparm; i++)
3006 mem = gen_rtx_MEM (Pmode,
3007 plus_constant (save_area, i * UNITS_PER_WORD));
3008 set_mem_alias_set (mem, set);
3009 emit_move_insn (mem, gen_rtx_REG (Pmode,
3010 x86_64_int_parameter_registers[i]));
3013 if (next_cum.sse_nregs)
3015 /* Now emit code to save SSE registers. The AX parameter contains number
3016 of SSE parameter registers used to call this function. We use
3017 sse_prologue_save insn template that produces computed jump across
3018 SSE saves. We need some preparation work to get this working. */
3020 label = gen_label_rtx ();
3021 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3023 /* Compute address to jump to :
3024 label - 5*eax + nnamed_sse_arguments*5 */
3025 tmp_reg = gen_reg_rtx (Pmode);
3026 nsse_reg = gen_reg_rtx (Pmode);
3027 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3028 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3029 gen_rtx_MULT (Pmode, nsse_reg,
3031 if (next_cum.sse_regno)
3034 gen_rtx_CONST (DImode,
3035 gen_rtx_PLUS (DImode,
3037 GEN_INT (next_cum.sse_regno * 4))));
3039 emit_move_insn (nsse_reg, label_ref);
3040 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3042 /* Compute address of memory block we save into. We always use pointer
3043 pointing 127 bytes after first byte to store - this is needed to keep
3044 instruction size limited by 4 bytes. */
3045 tmp_reg = gen_reg_rtx (Pmode);
3046 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3047 plus_constant (save_area,
3048 8 * REGPARM_MAX + 127)));
3049 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3050 set_mem_alias_set (mem, set);
3051 set_mem_align (mem, BITS_PER_WORD);
3053 /* And finally do the dirty job! */
3054 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3055 GEN_INT (next_cum.sse_regno), label));
3060 /* Implement va_start. */
3063 ix86_va_start (tree valist, rtx nextarg)
3065 HOST_WIDE_INT words, n_gpr, n_fpr;
3066 tree f_gpr, f_fpr, f_ovf, f_sav;
3067 tree gpr, fpr, ovf, sav, t;
3069 /* Only 64bit target needs something special. */
3072 std_expand_builtin_va_start (valist, nextarg);
3076 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3077 f_fpr = TREE_CHAIN (f_gpr);
3078 f_ovf = TREE_CHAIN (f_fpr);
3079 f_sav = TREE_CHAIN (f_ovf);
3081 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3082 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3083 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3084 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3085 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3087 /* Count number of gp and fp argument registers used. */
3088 words = current_function_args_info.words;
3089 n_gpr = current_function_args_info.regno;
3090 n_fpr = current_function_args_info.sse_regno;
3092 if (TARGET_DEBUG_ARG)
3093 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3094 (int) words, (int) n_gpr, (int) n_fpr);
3096 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3097 build_int_2 (n_gpr * 8, 0));
3098 TREE_SIDE_EFFECTS (t) = 1;
3099 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3101 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3102 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3103 TREE_SIDE_EFFECTS (t) = 1;
3104 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3106 /* Find the overflow area. */
3107 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3109 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3110 build_int_2 (words * UNITS_PER_WORD, 0));
3111 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3112 TREE_SIDE_EFFECTS (t) = 1;
3113 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3115 /* Find the register save area.
3116 Prologue of the function save it right above stack frame. */
3117 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3118 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3119 TREE_SIDE_EFFECTS (t) = 1;
3120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3123 /* Implement va_arg. */
3125 ix86_va_arg (tree valist, tree type)
3127 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3128 tree f_gpr, f_fpr, f_ovf, f_sav;
3129 tree gpr, fpr, ovf, sav, t;
3131 rtx lab_false, lab_over = NULL_RTX;
3136 /* Only 64bit target needs something special. */
3139 return std_expand_builtin_va_arg (valist, type);
3142 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3143 f_fpr = TREE_CHAIN (f_gpr);
3144 f_ovf = TREE_CHAIN (f_fpr);
3145 f_sav = TREE_CHAIN (f_ovf);
3147 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3148 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3149 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3150 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3151 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3153 size = int_size_in_bytes (type);
3156 /* Passed by reference. */
3158 type = build_pointer_type (type);
3159 size = int_size_in_bytes (type);
3161 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3163 container = construct_container (TYPE_MODE (type), type, 0,
3164 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3166 * Pull the value out of the saved registers ...
3169 addr_rtx = gen_reg_rtx (Pmode);
3173 rtx int_addr_rtx, sse_addr_rtx;
3174 int needed_intregs, needed_sseregs;
3177 lab_over = gen_label_rtx ();
3178 lab_false = gen_label_rtx ();
3180 examine_argument (TYPE_MODE (type), type, 0,
3181 &needed_intregs, &needed_sseregs);
3184 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3185 || TYPE_ALIGN (type) > 128);
3187 /* In case we are passing structure, verify that it is consecutive block
3188 on the register save area. If not we need to do moves. */
3189 if (!need_temp && !REG_P (container))
3191 /* Verify that all registers are strictly consecutive */
3192 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3196 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3198 rtx slot = XVECEXP (container, 0, i);
3199 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3200 || INTVAL (XEXP (slot, 1)) != i * 16)
3208 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3210 rtx slot = XVECEXP (container, 0, i);
3211 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3212 || INTVAL (XEXP (slot, 1)) != i * 8)
3219 int_addr_rtx = addr_rtx;
3220 sse_addr_rtx = addr_rtx;
3224 int_addr_rtx = gen_reg_rtx (Pmode);
3225 sse_addr_rtx = gen_reg_rtx (Pmode);
3227 /* First ensure that we fit completely in registers. */
3230 emit_cmp_and_jump_insns (expand_expr
3231 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3232 GEN_INT ((REGPARM_MAX - needed_intregs +
3233 1) * 8), GE, const1_rtx, SImode,
3238 emit_cmp_and_jump_insns (expand_expr
3239 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3240 GEN_INT ((SSE_REGPARM_MAX -
3241 needed_sseregs + 1) * 16 +
3242 REGPARM_MAX * 8), GE, const1_rtx,
3243 SImode, 1, lab_false);
3246 /* Compute index to start of area used for integer regs. */
3249 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3250 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3251 if (r != int_addr_rtx)
3252 emit_move_insn (int_addr_rtx, r);
3256 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3257 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3258 if (r != sse_addr_rtx)
3259 emit_move_insn (sse_addr_rtx, r);
3267 /* Never use the memory itself, as it has the alias set. */
3268 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3269 mem = gen_rtx_MEM (BLKmode, x);
3270 force_operand (x, addr_rtx);
3271 set_mem_alias_set (mem, get_varargs_alias_set ());
3272 set_mem_align (mem, BITS_PER_UNIT);
3274 for (i = 0; i < XVECLEN (container, 0); i++)
3276 rtx slot = XVECEXP (container, 0, i);
3277 rtx reg = XEXP (slot, 0);
3278 enum machine_mode mode = GET_MODE (reg);
3284 if (SSE_REGNO_P (REGNO (reg)))
3286 src_addr = sse_addr_rtx;
3287 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3291 src_addr = int_addr_rtx;
3292 src_offset = REGNO (reg) * 8;
3294 src_mem = gen_rtx_MEM (mode, src_addr);
3295 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3296 src_mem = adjust_address (src_mem, mode, src_offset);
3297 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3298 emit_move_insn (dest_mem, src_mem);
3305 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3306 build_int_2 (needed_intregs * 8, 0));
3307 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3308 TREE_SIDE_EFFECTS (t) = 1;
3309 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3314 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3315 build_int_2 (needed_sseregs * 16, 0));
3316 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3317 TREE_SIDE_EFFECTS (t) = 1;
3318 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3321 emit_jump_insn (gen_jump (lab_over));
3323 emit_label (lab_false);
3326 /* ... otherwise out of the overflow area. */
3328 /* Care for on-stack alignment if needed. */
3329 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3333 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3334 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3335 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3339 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3341 emit_move_insn (addr_rtx, r);
3344 build (PLUS_EXPR, TREE_TYPE (t), t,
3345 build_int_2 (rsize * UNITS_PER_WORD, 0));
3346 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3347 TREE_SIDE_EFFECTS (t) = 1;
3348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3351 emit_label (lab_over);
3355 r = gen_rtx_MEM (Pmode, addr_rtx);
3356 set_mem_alias_set (r, get_varargs_alias_set ());
3357 emit_move_insn (addr_rtx, r);
3363 /* Return nonzero if OP is either a i387 or SSE fp register. */
3365 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3367 return ANY_FP_REG_P (op);
3370 /* Return nonzero if OP is an i387 fp register. */
3372 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3374 return FP_REG_P (op);
3377 /* Return nonzero if OP is a non-fp register_operand. */
3379 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3381 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3384 /* Return nonzero if OP is a register operand other than an
3385 i387 fp register. */
3387 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3389 return register_operand (op, mode) && !FP_REG_P (op);
3392 /* Return nonzero if OP is general operand representable on x86_64. */
3395 x86_64_general_operand (rtx op, enum machine_mode mode)
3398 return general_operand (op, mode);
3399 if (nonimmediate_operand (op, mode))
3401 return x86_64_sign_extended_value (op);
3404 /* Return nonzero if OP is general operand representable on x86_64
3405 as either sign extended or zero extended constant. */
3408 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3411 return general_operand (op, mode);
3412 if (nonimmediate_operand (op, mode))
3414 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3417 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3420 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3423 return nonmemory_operand (op, mode);
3424 if (register_operand (op, mode))
3426 return x86_64_sign_extended_value (op);
3429 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3432 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3434 if (!TARGET_64BIT || !flag_pic)
3435 return nonmemory_operand (op, mode);
3436 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3438 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3443 /* Return nonzero if OPNUM's MEM should be matched
3444 in movabs* patterns. */
3447 ix86_check_movabs (rtx insn, int opnum)
3451 set = PATTERN (insn);
3452 if (GET_CODE (set) == PARALLEL)
3453 set = XVECEXP (set, 0, 0);
3454 if (GET_CODE (set) != SET)
3456 mem = XEXP (set, opnum);
3457 while (GET_CODE (mem) == SUBREG)
3458 mem = SUBREG_REG (mem);
3459 if (GET_CODE (mem) != MEM)
3461 return (volatile_ok || !MEM_VOLATILE_P (mem));
3464 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3467 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3470 return nonmemory_operand (op, mode);
3471 if (register_operand (op, mode))
3473 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3476 /* Return nonzero if OP is immediate operand representable on x86_64. */
3479 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3482 return immediate_operand (op, mode);
3483 return x86_64_sign_extended_value (op);
3486 /* Return nonzero if OP is immediate operand representable on x86_64. */
3489 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3491 return x86_64_zero_extended_value (op);
3494 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3495 for shift & compare patterns, as shifting by 0 does not change flags),
3496 else return zero. */
3499 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3501 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3504 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3505 reference and a constant. */
3508 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3510 switch (GET_CODE (op))
3518 if (GET_CODE (op) == SYMBOL_REF
3519 || GET_CODE (op) == LABEL_REF
3520 || (GET_CODE (op) == UNSPEC
3521 && (XINT (op, 1) == UNSPEC_GOT
3522 || XINT (op, 1) == UNSPEC_GOTOFF
3523 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3525 if (GET_CODE (op) != PLUS
3526 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3530 if (GET_CODE (op) == SYMBOL_REF
3531 || GET_CODE (op) == LABEL_REF)
3533 /* Only @GOTOFF gets offsets. */
3534 if (GET_CODE (op) != UNSPEC
3535 || XINT (op, 1) != UNSPEC_GOTOFF)
3538 op = XVECEXP (op, 0, 0);
3539 if (GET_CODE (op) == SYMBOL_REF
3540 || GET_CODE (op) == LABEL_REF)
3549 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3552 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3554 if (GET_CODE (op) != CONST)
3559 if (GET_CODE (op) == UNSPEC
3560 && XINT (op, 1) == UNSPEC_GOTPCREL)
3562 if (GET_CODE (op) == PLUS
3563 && GET_CODE (XEXP (op, 0)) == UNSPEC
3564 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3569 if (GET_CODE (op) == UNSPEC)
3571 if (GET_CODE (op) != PLUS
3572 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3575 if (GET_CODE (op) == UNSPEC)
3581 /* Return true if OP is a symbolic operand that resolves locally. */
3584 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3586 if (GET_CODE (op) == CONST
3587 && GET_CODE (XEXP (op, 0)) == PLUS
3588 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3589 op = XEXP (XEXP (op, 0), 0);
3591 if (GET_CODE (op) == LABEL_REF)
3594 if (GET_CODE (op) != SYMBOL_REF)
3597 if (SYMBOL_REF_LOCAL_P (op))
3600 /* There is, however, a not insubstantial body of code in the rest of
3601 the compiler that assumes it can just stick the results of
3602 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3603 /* ??? This is a hack. Should update the body of the compiler to
3604 always create a DECL an invoke targetm.encode_section_info. */
3605 if (strncmp (XSTR (op, 0), internal_label_prefix,
3606 internal_label_prefix_len) == 0)
3612 /* Test for various thread-local symbols. */
3615 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3617 if (GET_CODE (op) != SYMBOL_REF)
3619 return SYMBOL_REF_TLS_MODEL (op);
3623 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3625 if (GET_CODE (op) != SYMBOL_REF)
3627 return SYMBOL_REF_TLS_MODEL (op) == kind;
3631 global_dynamic_symbolic_operand (rtx op,
3632 enum machine_mode mode ATTRIBUTE_UNUSED)
3634 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3638 local_dynamic_symbolic_operand (rtx op,
3639 enum machine_mode mode ATTRIBUTE_UNUSED)
3641 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3645 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3647 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3651 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3653 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3656 /* Test for a valid operand for a call instruction. Don't allow the
3657 arg pointer register or virtual regs since they may decay into
3658 reg + const, which the patterns can't handle. */
3661 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3663 /* Disallow indirect through a virtual register. This leads to
3664 compiler aborts when trying to eliminate them. */
3665 if (GET_CODE (op) == REG
3666 && (op == arg_pointer_rtx
3667 || op == frame_pointer_rtx
3668 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3669 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3672 /* Disallow `call 1234'. Due to varying assembler lameness this
3673 gets either rejected or translated to `call .+1234'. */
3674 if (GET_CODE (op) == CONST_INT)
3677 /* Explicitly allow SYMBOL_REF even if pic. */
3678 if (GET_CODE (op) == SYMBOL_REF)
3681 /* Otherwise we can allow any general_operand in the address. */
3682 return general_operand (op, Pmode);
3685 /* Test for a valid operand for a call instruction. Don't allow the
3686 arg pointer register or virtual regs since they may decay into
3687 reg + const, which the patterns can't handle. */
3690 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3692 /* Disallow indirect through a virtual register. This leads to
3693 compiler aborts when trying to eliminate them. */
3694 if (GET_CODE (op) == REG
3695 && (op == arg_pointer_rtx
3696 || op == frame_pointer_rtx
3697 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3698 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3701 /* Explicitly allow SYMBOL_REF even if pic. */
3702 if (GET_CODE (op) == SYMBOL_REF)
3705 /* Otherwise we can only allow register operands. */
3706 return register_operand (op, Pmode);
3710 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3712 if (GET_CODE (op) == CONST
3713 && GET_CODE (XEXP (op, 0)) == PLUS
3714 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3715 op = XEXP (XEXP (op, 0), 0);
3716 return GET_CODE (op) == SYMBOL_REF;
3719 /* Match exactly zero and one. */
3722 const0_operand (rtx op, enum machine_mode mode)
3724 return op == CONST0_RTX (mode);
3728 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3730 return op == const1_rtx;
3733 /* Match 2, 4, or 8. Used for leal multiplicands. */
3736 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3738 return (GET_CODE (op) == CONST_INT
3739 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3743 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3745 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3749 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3751 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3755 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3757 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3761 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3763 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3767 /* True if this is a constant appropriate for an increment or decrement. */
3770 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3773 registers, since carry flag is not set. */
3774 if (TARGET_PENTIUM4 && !optimize_size)
3776 return op == const1_rtx || op == constm1_rtx;
3779 /* Return nonzero if OP is acceptable as operand of DImode shift
3783 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3786 return nonimmediate_operand (op, mode);
3788 return register_operand (op, mode);
3791 /* Return false if this is the stack pointer, or any other fake
3792 register eliminable to the stack pointer. Otherwise, this is
3795 This is used to prevent esp from being used as an index reg.
3796 Which would only happen in pathological cases. */
3799 reg_no_sp_operand (rtx op, enum machine_mode mode)
3802 if (GET_CODE (t) == SUBREG)
3804 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3807 return register_operand (op, mode);
3811 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3813 return MMX_REG_P (op);
3816 /* Return false if this is any eliminable register. Otherwise
3820 general_no_elim_operand (rtx op, enum machine_mode mode)
3823 if (GET_CODE (t) == SUBREG)
3825 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3826 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3827 || t == virtual_stack_dynamic_rtx)
3830 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3831 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3834 return general_operand (op, mode);
3837 /* Return false if this is any eliminable register. Otherwise
3838 register_operand or const_int. */
3841 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3844 if (GET_CODE (t) == SUBREG)
3846 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3847 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3848 || t == virtual_stack_dynamic_rtx)
3851 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3854 /* Return false if this is any eliminable register or stack register,
3855 otherwise work like register_operand. */
3858 index_register_operand (rtx op, enum machine_mode mode)
3861 if (GET_CODE (t) == SUBREG)
3865 if (t == arg_pointer_rtx
3866 || t == frame_pointer_rtx
3867 || t == virtual_incoming_args_rtx
3868 || t == virtual_stack_vars_rtx
3869 || t == virtual_stack_dynamic_rtx
3870 || REGNO (t) == STACK_POINTER_REGNUM)
3873 return general_operand (op, mode);
3876 /* Return true if op is a Q_REGS class register. */
3879 q_regs_operand (rtx op, enum machine_mode mode)
3881 if (mode != VOIDmode && GET_MODE (op) != mode)
3883 if (GET_CODE (op) == SUBREG)
3884 op = SUBREG_REG (op);
3885 return ANY_QI_REG_P (op);
3888 /* Return true if op is an flags register. */
3891 flags_reg_operand (rtx op, enum machine_mode mode)
3893 if (mode != VOIDmode && GET_MODE (op) != mode)
3895 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3898 /* Return true if op is a NON_Q_REGS class register. */
3901 non_q_regs_operand (rtx op, enum machine_mode mode)
3903 if (mode != VOIDmode && GET_MODE (op) != mode)
3905 if (GET_CODE (op) == SUBREG)
3906 op = SUBREG_REG (op);
3907 return NON_QI_REG_P (op);
3911 zero_extended_scalar_load_operand (rtx op,
3912 enum machine_mode mode ATTRIBUTE_UNUSED)
3915 if (GET_CODE (op) != MEM)
3917 op = maybe_get_pool_constant (op);
3920 if (GET_CODE (op) != CONST_VECTOR)
3923 (GET_MODE_SIZE (GET_MODE (op)) /
3924 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3925 for (n_elts--; n_elts > 0; n_elts--)
3927 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3928 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3934 /* Return 1 when OP is operand acceptable for standard SSE move. */
3936 vector_move_operand (rtx op, enum machine_mode mode)
3938 if (nonimmediate_operand (op, mode))
3940 if (GET_MODE (op) != mode && mode != VOIDmode)
3942 return (op == CONST0_RTX (GET_MODE (op)));
3945 /* Return true if op if a valid address, and does not contain
3946 a segment override. */
3949 no_seg_address_operand (rtx op, enum machine_mode mode)
3951 struct ix86_address parts;
3953 if (! address_operand (op, mode))
3956 if (! ix86_decompose_address (op, &parts))
3959 return parts.seg == SEG_DEFAULT;
3962 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3965 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3967 enum rtx_code code = GET_CODE (op);
3970 /* Operations supported directly. */
3980 /* These are equivalent to ones above in non-IEEE comparisons. */
3987 return !TARGET_IEEE_FP;
3992 /* Return 1 if OP is a valid comparison operator in valid mode. */
3994 ix86_comparison_operator (rtx op, enum machine_mode mode)
3996 enum machine_mode inmode;
3997 enum rtx_code code = GET_CODE (op);
3998 if (mode != VOIDmode && GET_MODE (op) != mode)
4000 if (GET_RTX_CLASS (code) != '<')
4002 inmode = GET_MODE (XEXP (op, 0));
4004 if (inmode == CCFPmode || inmode == CCFPUmode)
4006 enum rtx_code second_code, bypass_code;
4007 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4008 return (bypass_code == NIL && second_code == NIL);
4015 if (inmode == CCmode || inmode == CCGCmode
4016 || inmode == CCGOCmode || inmode == CCNOmode)
4019 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4020 if (inmode == CCmode)
4024 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4032 /* Return 1 if OP is a valid comparison operator testing carry flag
4035 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4037 enum machine_mode inmode;
4038 enum rtx_code code = GET_CODE (op);
4040 if (mode != VOIDmode && GET_MODE (op) != mode)
4042 if (GET_RTX_CLASS (code) != '<')
4044 inmode = GET_MODE (XEXP (op, 0));
4045 if (GET_CODE (XEXP (op, 0)) != REG
4046 || REGNO (XEXP (op, 0)) != 17
4047 || XEXP (op, 1) != const0_rtx)
4050 if (inmode == CCFPmode || inmode == CCFPUmode)
4052 enum rtx_code second_code, bypass_code;
4054 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4055 if (bypass_code != NIL || second_code != NIL)
4057 code = ix86_fp_compare_code_to_integer (code);
4059 else if (inmode != CCmode)
4064 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4067 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4069 enum machine_mode inmode;
4070 enum rtx_code code = GET_CODE (op);
4072 if (mode != VOIDmode && GET_MODE (op) != mode)
4074 if (GET_RTX_CLASS (code) != '<')
4076 inmode = GET_MODE (XEXP (op, 0));
4077 if (inmode == CCFPmode || inmode == CCFPUmode)
4079 enum rtx_code second_code, bypass_code;
4081 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4082 if (bypass_code != NIL || second_code != NIL)
4084 code = ix86_fp_compare_code_to_integer (code);
4086 /* i387 supports just limited amount of conditional codes. */
4089 case LTU: case GTU: case LEU: case GEU:
4090 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4093 case ORDERED: case UNORDERED:
4101 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4104 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4106 switch (GET_CODE (op))
4109 /* Modern CPUs have same latency for HImode and SImode multiply,
4110 but 386 and 486 do HImode multiply faster. */
4111 return ix86_tune > PROCESSOR_I486;
4123 /* Nearly general operand, but accept any const_double, since we wish
4124 to be able to drop them into memory rather than have them get pulled
4128 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4130 if (mode != VOIDmode && mode != GET_MODE (op))
4132 if (GET_CODE (op) == CONST_DOUBLE)
4134 return general_operand (op, mode);
4137 /* Match an SI or HImode register for a zero_extract. */
4140 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4143 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4144 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4147 if (!register_operand (op, VOIDmode))
4150 /* Be careful to accept only registers having upper parts. */
4151 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4152 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4155 /* Return 1 if this is a valid binary floating-point operation.
4156 OP is the expression matched, and MODE is its mode. */
4159 binary_fp_operator (rtx op, enum machine_mode mode)
4161 if (mode != VOIDmode && mode != GET_MODE (op))
4164 switch (GET_CODE (op))
4170 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4178 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4180 return GET_CODE (op) == MULT;
4184 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4186 return GET_CODE (op) == DIV;
4190 arith_or_logical_operator (rtx op, enum machine_mode mode)
4192 return ((mode == VOIDmode || GET_MODE (op) == mode)
4193 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4194 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4197 /* Returns 1 if OP is memory operand with a displacement. */
4200 memory_displacement_operand (rtx op, enum machine_mode mode)
4202 struct ix86_address parts;
4204 if (! memory_operand (op, mode))
4207 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4210 return parts.disp != NULL_RTX;
4213 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4214 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4216 ??? It seems likely that this will only work because cmpsi is an
4217 expander, and no actual insns use this. */
4220 cmpsi_operand (rtx op, enum machine_mode mode)
4222 if (nonimmediate_operand (op, mode))
4225 if (GET_CODE (op) == AND
4226 && GET_MODE (op) == SImode
4227 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4228 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4229 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4230 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4231 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4232 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4238 /* Returns 1 if OP is memory operand that can not be represented by the
4242 long_memory_operand (rtx op, enum machine_mode mode)
4244 if (! memory_operand (op, mode))
4247 return memory_address_length (op) != 0;
4250 /* Return nonzero if the rtx is known aligned. */
4253 aligned_operand (rtx op, enum machine_mode mode)
4255 struct ix86_address parts;
4257 if (!general_operand (op, mode))
4260 /* Registers and immediate operands are always "aligned". */
4261 if (GET_CODE (op) != MEM)
4264 /* Don't even try to do any aligned optimizations with volatiles. */
4265 if (MEM_VOLATILE_P (op))
4270 /* Pushes and pops are only valid on the stack pointer. */
4271 if (GET_CODE (op) == PRE_DEC
4272 || GET_CODE (op) == POST_INC)
4275 /* Decode the address. */
4276 if (! ix86_decompose_address (op, &parts))
4279 /* Look for some component that isn't known to be aligned. */
4283 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4288 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4293 if (GET_CODE (parts.disp) != CONST_INT
4294 || (INTVAL (parts.disp) & 3) != 0)
4298 /* Didn't find one -- this must be an aligned address. */
4302 /* Initialize the table of extra 80387 mathematical constants. */
4305 init_ext_80387_constants (void)
4307 static const char * cst[5] =
4309 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4310 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4311 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4312 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4313 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4317 for (i = 0; i < 5; i++)
4319 real_from_string (&ext_80387_constants_table[i], cst[i]);
4320 /* Ensure each constant is rounded to XFmode precision. */
4321 real_convert (&ext_80387_constants_table[i],
4322 XFmode, &ext_80387_constants_table[i]);
4325 ext_80387_constants_init = 1;
4328 /* Return true if the constant is something that can be loaded with
4329 a special instruction. */
4332 standard_80387_constant_p (rtx x)
4334 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4337 if (x == CONST0_RTX (GET_MODE (x)))
4339 if (x == CONST1_RTX (GET_MODE (x)))
4342 /* For XFmode constants, try to find a special 80387 instruction on
4343 those CPUs that benefit from them. */
4344 if (GET_MODE (x) == XFmode
4345 && x86_ext_80387_constants & TUNEMASK)
4350 if (! ext_80387_constants_init)
4351 init_ext_80387_constants ();
4353 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4354 for (i = 0; i < 5; i++)
4355 if (real_identical (&r, &ext_80387_constants_table[i]))
4362 /* Return the opcode of the special instruction to be used to load
4366 standard_80387_constant_opcode (rtx x)
4368 switch (standard_80387_constant_p (x))
4388 /* Return the CONST_DOUBLE representing the 80387 constant that is
4389 loaded by the specified special instruction. The argument IDX
4390 matches the return value from standard_80387_constant_p. */
4393 standard_80387_constant_rtx (int idx)
4397 if (! ext_80387_constants_init)
4398 init_ext_80387_constants ();
4414 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4418 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4421 standard_sse_constant_p (rtx x)
4423 if (x == const0_rtx)
4425 return (x == CONST0_RTX (GET_MODE (x)));
4428 /* Returns 1 if OP contains a symbol reference */
4431 symbolic_reference_mentioned_p (rtx op)
4436 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4439 fmt = GET_RTX_FORMAT (GET_CODE (op));
4440 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4446 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4447 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4451 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4458 /* Return 1 if it is appropriate to emit `ret' instructions in the
4459 body of a function. Do this only if the epilogue is simple, needing a
4460 couple of insns. Prior to reloading, we can't tell how many registers
4461 must be saved, so return 0 then. Return 0 if there is no frame
4462 marker to de-allocate.
4464 If NON_SAVING_SETJMP is defined and true, then it is not possible
4465 for the epilogue to be simple, so return 0. This is a special case
4466 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4467 until final, but jump_optimize may need to know sooner if a
4471 ix86_can_use_return_insn_p (void)
4473 struct ix86_frame frame;
4475 #ifdef NON_SAVING_SETJMP
4476 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4480 if (! reload_completed || frame_pointer_needed)
4483 /* Don't allow more than 32 pop, since that's all we can do
4484 with one instruction. */
4485 if (current_function_pops_args
4486 && current_function_args_size >= 32768)
4489 ix86_compute_frame_layout (&frame);
4490 return frame.to_allocate == 0 && frame.nregs == 0;
4493 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4495 x86_64_sign_extended_value (rtx value)
4497 switch (GET_CODE (value))
4499 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4500 to be at least 32 and this all acceptable constants are
4501 represented as CONST_INT. */
4503 if (HOST_BITS_PER_WIDE_INT == 32)
4507 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4508 return trunc_int_for_mode (val, SImode) == val;
4512 /* For certain code models, the symbolic references are known to fit.
4513 in CM_SMALL_PIC model we know it fits if it is local to the shared
4514 library. Don't count TLS SYMBOL_REFs here, since they should fit
4515 only if inside of UNSPEC handled below. */
4517 /* TLS symbols are not constant. */
4518 if (tls_symbolic_operand (value, Pmode))
4520 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4522 /* For certain code models, the code is near as well. */
4524 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4525 || ix86_cmodel == CM_KERNEL);
4527 /* We also may accept the offsetted memory references in certain special
4530 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4531 switch (XINT (XEXP (value, 0), 1))
4533 case UNSPEC_GOTPCREL:
4535 case UNSPEC_GOTNTPOFF:
4541 if (GET_CODE (XEXP (value, 0)) == PLUS)
4543 rtx op1 = XEXP (XEXP (value, 0), 0);
4544 rtx op2 = XEXP (XEXP (value, 0), 1);
4545 HOST_WIDE_INT offset;
4547 if (ix86_cmodel == CM_LARGE)
4549 if (GET_CODE (op2) != CONST_INT)
4551 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4552 switch (GET_CODE (op1))
4555 /* For CM_SMALL assume that latest object is 16MB before
4556 end of 31bits boundary. We may also accept pretty
4557 large negative constants knowing that all objects are
4558 in the positive half of address space. */
4559 if (ix86_cmodel == CM_SMALL
4560 && offset < 16*1024*1024
4561 && trunc_int_for_mode (offset, SImode) == offset)
4563 /* For CM_KERNEL we know that all object resist in the
4564 negative half of 32bits address space. We may not
4565 accept negative offsets, since they may be just off
4566 and we may accept pretty large positive ones. */
4567 if (ix86_cmodel == CM_KERNEL
4569 && trunc_int_for_mode (offset, SImode) == offset)
4573 /* These conditions are similar to SYMBOL_REF ones, just the
4574 constraints for code models differ. */
4575 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4576 && offset < 16*1024*1024
4577 && trunc_int_for_mode (offset, SImode) == offset)
4579 if (ix86_cmodel == CM_KERNEL
4581 && trunc_int_for_mode (offset, SImode) == offset)
4585 switch (XINT (op1, 1))
4590 && trunc_int_for_mode (offset, SImode) == offset)
4604 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4606 x86_64_zero_extended_value (rtx value)
4608 switch (GET_CODE (value))
4611 if (HOST_BITS_PER_WIDE_INT == 32)
4612 return (GET_MODE (value) == VOIDmode
4613 && !CONST_DOUBLE_HIGH (value));
4617 if (HOST_BITS_PER_WIDE_INT == 32)
4618 return INTVAL (value) >= 0;
4620 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4623 /* For certain code models, the symbolic references are known to fit. */
4625 /* TLS symbols are not constant. */
4626 if (tls_symbolic_operand (value, Pmode))
4628 return ix86_cmodel == CM_SMALL;
4630 /* For certain code models, the code is near as well. */
4632 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4634 /* We also may accept the offsetted memory references in certain special
4637 if (GET_CODE (XEXP (value, 0)) == PLUS)
4639 rtx op1 = XEXP (XEXP (value, 0), 0);
4640 rtx op2 = XEXP (XEXP (value, 0), 1);
4642 if (ix86_cmodel == CM_LARGE)
4644 switch (GET_CODE (op1))
4648 /* For small code model we may accept pretty large positive
4649 offsets, since one bit is available for free. Negative
4650 offsets are limited by the size of NULL pointer area
4651 specified by the ABI. */
4652 if (ix86_cmodel == CM_SMALL
4653 && GET_CODE (op2) == CONST_INT
4654 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4655 && (trunc_int_for_mode (INTVAL (op2), SImode)
4658 /* ??? For the kernel, we may accept adjustment of
4659 -0x10000000, since we know that it will just convert
4660 negative address space to positive, but perhaps this
4661 is not worthwhile. */
4664 /* These conditions are similar to SYMBOL_REF ones, just the
4665 constraints for code models differ. */
4666 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4667 && GET_CODE (op2) == CONST_INT
4668 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4669 && (trunc_int_for_mode (INTVAL (op2), SImode)
4683 /* Value should be nonzero if functions must have frame pointers.
4684 Zero means the frame pointer need not be set up (and parms may
4685 be accessed via the stack pointer) in functions that seem suitable. */
4688 ix86_frame_pointer_required (void)
4690 /* If we accessed previous frames, then the generated code expects
4691 to be able to access the saved ebp value in our frame. */
4692 if (cfun->machine->accesses_prev_frame)
4695 /* Several x86 os'es need a frame pointer for other reasons,
4696 usually pertaining to setjmp. */
4697 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4700 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4701 the frame pointer by default. Turn it back on now if we've not
4702 got a leaf function. */
4703 if (TARGET_OMIT_LEAF_FRAME_POINTER
4704 && (!current_function_is_leaf))
4707 if (current_function_profile)
4713 /* Record that the current function accesses previous call frames. */
4716 ix86_setup_frame_addresses (void)
4718 cfun->machine->accesses_prev_frame = 1;
4721 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4722 # define USE_HIDDEN_LINKONCE 1
4724 # define USE_HIDDEN_LINKONCE 0
4727 static int pic_labels_used;
4729 /* Fills in the label name that should be used for a pc thunk for
4730 the given register. */
4733 get_pc_thunk_name (char name[32], unsigned int regno)
4735 if (USE_HIDDEN_LINKONCE)
4736 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4738 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4742 /* This function generates code for -fpic that loads %ebx with
4743 the return address of the caller and then returns. */
4746 ix86_file_end (void)
4751 for (regno = 0; regno < 8; ++regno)
4755 if (! ((pic_labels_used >> regno) & 1))
4758 get_pc_thunk_name (name, regno);
4760 if (USE_HIDDEN_LINKONCE)
4764 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4766 TREE_PUBLIC (decl) = 1;
4767 TREE_STATIC (decl) = 1;
4768 DECL_ONE_ONLY (decl) = 1;
4770 (*targetm.asm_out.unique_section) (decl, 0);
4771 named_section (decl, NULL, 0);
4773 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4774 fputs ("\t.hidden\t", asm_out_file);
4775 assemble_name (asm_out_file, name);
4776 fputc ('\n', asm_out_file);
4777 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4782 ASM_OUTPUT_LABEL (asm_out_file, name);
4785 xops[0] = gen_rtx_REG (SImode, regno);
4786 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4787 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4788 output_asm_insn ("ret", xops);
4791 if (NEED_INDICATE_EXEC_STACK)
4792 file_end_indicate_exec_stack ();
4795 /* Emit code for the SET_GOT patterns. */
4798 output_set_got (rtx dest)
4803 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4805 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4807 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4810 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4812 output_asm_insn ("call\t%a2", xops);
4815 /* Output the "canonical" label name ("Lxx$pb") here too. This
4816 is what will be referred to by the Mach-O PIC subsystem. */
4817 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4819 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4820 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4823 output_asm_insn ("pop{l}\t%0", xops);
4828 get_pc_thunk_name (name, REGNO (dest));
4829 pic_labels_used |= 1 << REGNO (dest);
4831 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4832 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4833 output_asm_insn ("call\t%X2", xops);
4836 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4837 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4838 else if (!TARGET_MACHO)
4839 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4844 /* Generate an "push" pattern for input ARG. */
4849 return gen_rtx_SET (VOIDmode,
4851 gen_rtx_PRE_DEC (Pmode,
4852 stack_pointer_rtx)),
4856 /* Return >= 0 if there is an unused call-clobbered register available
4857 for the entire function. */
4860 ix86_select_alt_pic_regnum (void)
4862 if (current_function_is_leaf && !current_function_profile)
4865 for (i = 2; i >= 0; --i)
4866 if (!regs_ever_live[i])
4870 return INVALID_REGNUM;
4873 /* Return 1 if we need to save REGNO. */
4875 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4877 if (pic_offset_table_rtx
4878 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4879 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4880 || current_function_profile
4881 || current_function_calls_eh_return
4882 || current_function_uses_const_pool))
4884 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4889 if (current_function_calls_eh_return && maybe_eh_return)
4894 unsigned test = EH_RETURN_DATA_REGNO (i);
4895 if (test == INVALID_REGNUM)
4902 return (regs_ever_live[regno]
4903 && !call_used_regs[regno]
4904 && !fixed_regs[regno]
4905 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4908 /* Return number of registers to be saved on the stack. */
4911 ix86_nsaved_regs (void)
4916 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4917 if (ix86_save_reg (regno, true))
4922 /* Return the offset between two registers, one to be eliminated, and the other
4923 its replacement, at the start of a routine. */
4926 ix86_initial_elimination_offset (int from, int to)
4928 struct ix86_frame frame;
4929 ix86_compute_frame_layout (&frame);
4931 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4932 return frame.hard_frame_pointer_offset;
4933 else if (from == FRAME_POINTER_REGNUM
4934 && to == HARD_FRAME_POINTER_REGNUM)
4935 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4938 if (to != STACK_POINTER_REGNUM)
4940 else if (from == ARG_POINTER_REGNUM)
4941 return frame.stack_pointer_offset;
4942 else if (from != FRAME_POINTER_REGNUM)
4945 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4949 /* Fill structure ix86_frame about frame of currently computed function. */
4952 ix86_compute_frame_layout (struct ix86_frame *frame)
4954 HOST_WIDE_INT total_size;
4955 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4956 HOST_WIDE_INT offset;
4957 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4958 HOST_WIDE_INT size = get_frame_size ();
4960 frame->nregs = ix86_nsaved_regs ();
4963 /* During reload iteration the amount of registers saved can change.
4964 Recompute the value as needed. Do not recompute when amount of registers
4965 didn't change as reload does mutiple calls to the function and does not
4966 expect the decision to change within single iteration. */
4968 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4970 int count = frame->nregs;
4972 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4973 /* The fast prologue uses move instead of push to save registers. This
4974 is significantly longer, but also executes faster as modern hardware
4975 can execute the moves in parallel, but can't do that for push/pop.
4977 Be careful about choosing what prologue to emit: When function takes
4978 many instructions to execute we may use slow version as well as in
4979 case function is known to be outside hot spot (this is known with
4980 feedback only). Weight the size of function by number of registers
4981 to save as it is cheap to use one or two push instructions but very
4982 slow to use many of them. */
4984 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4985 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4986 || (flag_branch_probabilities
4987 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4988 cfun->machine->use_fast_prologue_epilogue = false;
4990 cfun->machine->use_fast_prologue_epilogue
4991 = !expensive_function_p (count);
4993 if (TARGET_PROLOGUE_USING_MOVE
4994 && cfun->machine->use_fast_prologue_epilogue)
4995 frame->save_regs_using_mov = true;
4997 frame->save_regs_using_mov = false;
5000 /* Skip return address and saved base pointer. */
5001 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5003 frame->hard_frame_pointer_offset = offset;
5005 /* Do some sanity checking of stack_alignment_needed and
5006 preferred_alignment, since i386 port is the only using those features
5007 that may break easily. */
5009 if (size && !stack_alignment_needed)
5011 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5013 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5015 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5018 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5019 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5021 /* Register save area */
5022 offset += frame->nregs * UNITS_PER_WORD;
5025 if (ix86_save_varrargs_registers)
5027 offset += X86_64_VARARGS_SIZE;
5028 frame->va_arg_size = X86_64_VARARGS_SIZE;
5031 frame->va_arg_size = 0;
5033 /* Align start of frame for local function. */
5034 frame->padding1 = ((offset + stack_alignment_needed - 1)
5035 & -stack_alignment_needed) - offset;
5037 offset += frame->padding1;
5039 /* Frame pointer points here. */
5040 frame->frame_pointer_offset = offset;
5044 /* Add outgoing arguments area. Can be skipped if we eliminated
5045 all the function calls as dead code.
5046 Skipping is however impossible when function calls alloca. Alloca
5047 expander assumes that last current_function_outgoing_args_size
5048 of stack frame are unused. */
5049 if (ACCUMULATE_OUTGOING_ARGS
5050 && (!current_function_is_leaf || current_function_calls_alloca))
5052 offset += current_function_outgoing_args_size;
5053 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5056 frame->outgoing_arguments_size = 0;
5058 /* Align stack boundary. Only needed if we're calling another function
5060 if (!current_function_is_leaf || current_function_calls_alloca)
5061 frame->padding2 = ((offset + preferred_alignment - 1)
5062 & -preferred_alignment) - offset;
5064 frame->padding2 = 0;
5066 offset += frame->padding2;
5068 /* We've reached end of stack frame. */
5069 frame->stack_pointer_offset = offset;
5071 /* Size prologue needs to allocate. */
5072 frame->to_allocate =
5073 (size + frame->padding1 + frame->padding2
5074 + frame->outgoing_arguments_size + frame->va_arg_size);
5076 if ((!frame->to_allocate && frame->nregs <= 1)
5077 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5078 frame->save_regs_using_mov = false;
5080 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5081 && current_function_is_leaf)
5083 frame->red_zone_size = frame->to_allocate;
5084 if (frame->save_regs_using_mov)
5085 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5086 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5087 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5090 frame->red_zone_size = 0;
5091 frame->to_allocate -= frame->red_zone_size;
5092 frame->stack_pointer_offset -= frame->red_zone_size;
5094 fprintf (stderr, "nregs: %i\n", frame->nregs);
5095 fprintf (stderr, "size: %i\n", size);
5096 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5097 fprintf (stderr, "padding1: %i\n", frame->padding1);
5098 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5099 fprintf (stderr, "padding2: %i\n", frame->padding2);
5100 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5101 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5102 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5103 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5104 frame->hard_frame_pointer_offset);
5105 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5109 /* Emit code to save registers in the prologue. */
5112 ix86_emit_save_regs (void)
5117 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5118 if (ix86_save_reg (regno, true))
5120 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5121 RTX_FRAME_RELATED_P (insn) = 1;
5125 /* Emit code to save registers using MOV insns. First register
5126 is restored from POINTER + OFFSET. */
5128 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5133 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5134 if (ix86_save_reg (regno, true))
5136 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5138 gen_rtx_REG (Pmode, regno));
5139 RTX_FRAME_RELATED_P (insn) = 1;
5140 offset += UNITS_PER_WORD;
5144 /* Expand prologue or epilogue stack adjustment.
5145 The pattern exist to put a dependency on all ebp-based memory accesses.
5146 STYLE should be negative if instructions should be marked as frame related,
5147 zero if %r11 register is live and cannot be freely used and positive
5151 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5156 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5157 else if (x86_64_immediate_operand (offset, DImode))
5158 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5162 /* r11 is used by indirect sibcall return as well, set before the
5163 epilogue and used after the epilogue. ATM indirect sibcall
5164 shouldn't be used together with huge frame sizes in one
5165 function because of the frame_size check in sibcall.c. */
5168 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5169 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5171 RTX_FRAME_RELATED_P (insn) = 1;
5172 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5176 RTX_FRAME_RELATED_P (insn) = 1;
5179 /* Expand the prologue into a bunch of separate insns. */
5182 ix86_expand_prologue (void)
5186 struct ix86_frame frame;
5187 HOST_WIDE_INT allocate;
5189 ix86_compute_frame_layout (&frame);
5191 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5192 slower on all targets. Also sdb doesn't like it. */
5194 if (frame_pointer_needed)
5196 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5197 RTX_FRAME_RELATED_P (insn) = 1;
5199 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5200 RTX_FRAME_RELATED_P (insn) = 1;
5203 allocate = frame.to_allocate;
5205 if (!frame.save_regs_using_mov)
5206 ix86_emit_save_regs ();
5208 allocate += frame.nregs * UNITS_PER_WORD;
5210 /* When using red zone we may start register saving before allocating
5211 the stack frame saving one cycle of the prologue. */
5212 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5213 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5214 : stack_pointer_rtx,
5215 -frame.nregs * UNITS_PER_WORD);
5219 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5220 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5221 GEN_INT (-allocate), -1);
5224 /* Only valid for Win32. */
5225 rtx eax = gen_rtx_REG (SImode, 0);
5226 bool eax_live = ix86_eax_live_at_start_p ();
5233 emit_insn (gen_push (eax));
5237 insn = emit_move_insn (eax, GEN_INT (allocate));
5238 RTX_FRAME_RELATED_P (insn) = 1;
5240 insn = emit_insn (gen_allocate_stack_worker (eax));
5241 RTX_FRAME_RELATED_P (insn) = 1;
5245 rtx t = plus_constant (stack_pointer_rtx, allocate);
5246 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5250 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5252 if (!frame_pointer_needed || !frame.to_allocate)
5253 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5255 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5256 -frame.nregs * UNITS_PER_WORD);
5259 pic_reg_used = false;
5260 if (pic_offset_table_rtx
5261 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5262 || current_function_profile))
5264 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5266 if (alt_pic_reg_used != INVALID_REGNUM)
5267 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5269 pic_reg_used = true;
5274 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5276 /* Even with accurate pre-reload life analysis, we can wind up
5277 deleting all references to the pic register after reload.
5278 Consider if cross-jumping unifies two sides of a branch
5279 controlled by a comparison vs the only read from a global.
5280 In which case, allow the set_got to be deleted, though we're
5281 too late to do anything about the ebx save in the prologue. */
5282 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5285 /* Prevent function calls from be scheduled before the call to mcount.
5286 In the pic_reg_used case, make sure that the got load isn't deleted. */
5287 if (current_function_profile)
5288 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5291 /* Emit code to restore saved registers using MOV insns. First register
5292 is restored from POINTER + OFFSET. */
5294 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5295 int maybe_eh_return)
5298 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5300 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5301 if (ix86_save_reg (regno, maybe_eh_return))
5303 /* Ensure that adjust_address won't be forced to produce pointer
5304 out of range allowed by x86-64 instruction set. */
5305 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5309 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5310 emit_move_insn (r11, GEN_INT (offset));
5311 emit_insn (gen_adddi3 (r11, r11, pointer));
5312 base_address = gen_rtx_MEM (Pmode, r11);
5315 emit_move_insn (gen_rtx_REG (Pmode, regno),
5316 adjust_address (base_address, Pmode, offset));
5317 offset += UNITS_PER_WORD;
5321 /* Restore function stack, frame, and registers. */
5324 ix86_expand_epilogue (int style)
5327 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5328 struct ix86_frame frame;
5329 HOST_WIDE_INT offset;
5331 ix86_compute_frame_layout (&frame);
5333 /* Calculate start of saved registers relative to ebp. Special care
5334 must be taken for the normal return case of a function using
5335 eh_return: the eax and edx registers are marked as saved, but not
5336 restored along this path. */
5337 offset = frame.nregs;
5338 if (current_function_calls_eh_return && style != 2)
5340 offset *= -UNITS_PER_WORD;
5342 /* If we're only restoring one register and sp is not valid then
5343 using a move instruction to restore the register since it's
5344 less work than reloading sp and popping the register.
5346 The default code result in stack adjustment using add/lea instruction,
5347 while this code results in LEAVE instruction (or discrete equivalent),
5348 so it is profitable in some other cases as well. Especially when there
5349 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5350 and there is exactly one register to pop. This heuristic may need some
5351 tuning in future. */
5352 if ((!sp_valid && frame.nregs <= 1)
5353 || (TARGET_EPILOGUE_USING_MOVE
5354 && cfun->machine->use_fast_prologue_epilogue
5355 && (frame.nregs > 1 || frame.to_allocate))
5356 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5357 || (frame_pointer_needed && TARGET_USE_LEAVE
5358 && cfun->machine->use_fast_prologue_epilogue
5359 && frame.nregs == 1)
5360 || current_function_calls_eh_return)
5362 /* Restore registers. We can use ebp or esp to address the memory
5363 locations. If both are available, default to ebp, since offsets
5364 are known to be small. Only exception is esp pointing directly to the
5365 end of block of saved registers, where we may simplify addressing
5368 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5369 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5370 frame.to_allocate, style == 2);
5372 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5373 offset, style == 2);
5375 /* eh_return epilogues need %ecx added to the stack pointer. */
5378 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5380 if (frame_pointer_needed)
5382 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5383 tmp = plus_constant (tmp, UNITS_PER_WORD);
5384 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5386 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5387 emit_move_insn (hard_frame_pointer_rtx, tmp);
5389 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5394 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5395 tmp = plus_constant (tmp, (frame.to_allocate
5396 + frame.nregs * UNITS_PER_WORD));
5397 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5400 else if (!frame_pointer_needed)
5401 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5402 GEN_INT (frame.to_allocate
5403 + frame.nregs * UNITS_PER_WORD),
5405 /* If not an i386, mov & pop is faster than "leave". */
5406 else if (TARGET_USE_LEAVE || optimize_size
5407 || !cfun->machine->use_fast_prologue_epilogue)
5408 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5411 pro_epilogue_adjust_stack (stack_pointer_rtx,
5412 hard_frame_pointer_rtx,
5415 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5417 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5422 /* First step is to deallocate the stack frame so that we can
5423 pop the registers. */
5426 if (!frame_pointer_needed)
5428 pro_epilogue_adjust_stack (stack_pointer_rtx,
5429 hard_frame_pointer_rtx,
5430 GEN_INT (offset), style);
5432 else if (frame.to_allocate)
5433 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5434 GEN_INT (frame.to_allocate), style);
5436 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5437 if (ix86_save_reg (regno, false))
5440 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5442 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5444 if (frame_pointer_needed)
5446 /* Leave results in shorter dependency chains on CPUs that are
5447 able to grok it fast. */
5448 if (TARGET_USE_LEAVE)
5449 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5450 else if (TARGET_64BIT)
5451 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5453 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5457 /* Sibcall epilogues don't want a return instruction. */
5461 if (current_function_pops_args && current_function_args_size)
5463 rtx popc = GEN_INT (current_function_pops_args);
5465 /* i386 can only pop 64K bytes. If asked to pop more, pop
5466 return address, do explicit add, and jump indirectly to the
5469 if (current_function_pops_args >= 65536)
5471 rtx ecx = gen_rtx_REG (SImode, 2);
5473 /* There is no "pascal" calling convention in 64bit ABI. */
5477 emit_insn (gen_popsi1 (ecx));
5478 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5479 emit_jump_insn (gen_return_indirect_internal (ecx));
5482 emit_jump_insn (gen_return_pop_internal (popc));
5485 emit_jump_insn (gen_return_internal ());
5488 /* Reset from the function's potential modifications. */
5491 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5492 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5494 if (pic_offset_table_rtx)
5495 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5498 /* Extract the parts of an RTL expression that is a valid memory address
5499 for an instruction. Return 0 if the structure of the address is
5500 grossly off. Return -1 if the address contains ASHIFT, so it is not
5501 strictly valid, but still used for computing length of lea instruction. */
5504 ix86_decompose_address (rtx addr, struct ix86_address *out)
5506 rtx base = NULL_RTX;
5507 rtx index = NULL_RTX;
5508 rtx disp = NULL_RTX;
5509 HOST_WIDE_INT scale = 1;
5510 rtx scale_rtx = NULL_RTX;
5512 enum ix86_address_seg seg = SEG_DEFAULT;
5514 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5516 else if (GET_CODE (addr) == PLUS)
5526 addends[n++] = XEXP (op, 1);
5529 while (GET_CODE (op) == PLUS);
5534 for (i = n; i >= 0; --i)
5537 switch (GET_CODE (op))
5542 index = XEXP (op, 0);
5543 scale_rtx = XEXP (op, 1);
5547 if (XINT (op, 1) == UNSPEC_TP
5548 && TARGET_TLS_DIRECT_SEG_REFS
5549 && seg == SEG_DEFAULT)
5550 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5579 else if (GET_CODE (addr) == MULT)
5581 index = XEXP (addr, 0); /* index*scale */
5582 scale_rtx = XEXP (addr, 1);
5584 else if (GET_CODE (addr) == ASHIFT)
5588 /* We're called for lea too, which implements ashift on occasion. */
5589 index = XEXP (addr, 0);
5590 tmp = XEXP (addr, 1);
5591 if (GET_CODE (tmp) != CONST_INT)
5593 scale = INTVAL (tmp);
5594 if ((unsigned HOST_WIDE_INT) scale > 3)
5600 disp = addr; /* displacement */
5602 /* Extract the integral value of scale. */
5605 if (GET_CODE (scale_rtx) != CONST_INT)
5607 scale = INTVAL (scale_rtx);
5610 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5611 if (base && index && scale == 1
5612 && (index == arg_pointer_rtx
5613 || index == frame_pointer_rtx
5614 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5621 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5622 if ((base == hard_frame_pointer_rtx
5623 || base == frame_pointer_rtx
5624 || base == arg_pointer_rtx) && !disp)
5627 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5628 Avoid this by transforming to [%esi+0]. */
5629 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5630 && base && !index && !disp
5632 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5635 /* Special case: encode reg+reg instead of reg*2. */
5636 if (!base && index && scale && scale == 2)
5637 base = index, scale = 1;
5639 /* Special case: scaling cannot be encoded without base or displacement. */
5640 if (!base && !disp && index && scale != 1)
5652 /* Return cost of the memory address x.
5653 For i386, it is better to use a complex address than let gcc copy
5654 the address into a reg and make a new pseudo. But not if the address
5655 requires to two regs - that would mean more pseudos with longer
5658 ix86_address_cost (rtx x)
5660 struct ix86_address parts;
5663 if (!ix86_decompose_address (x, &parts))
5666 /* More complex memory references are better. */
5667 if (parts.disp && parts.disp != const0_rtx)
5669 if (parts.seg != SEG_DEFAULT)
5672 /* Attempt to minimize number of registers in the address. */
5674 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5676 && (!REG_P (parts.index)
5677 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5681 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5683 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5684 && parts.base != parts.index)
5687 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5688 since it's predecode logic can't detect the length of instructions
5689 and it degenerates to vector decoded. Increase cost of such
5690 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5691 to split such addresses or even refuse such addresses at all.
5693 Following addressing modes are affected:
5698 The first and last case may be avoidable by explicitly coding the zero in
5699 memory address, but I don't have AMD-K6 machine handy to check this
5703 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5704 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5705 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5711 /* If X is a machine specific address (i.e. a symbol or label being
5712 referenced as a displacement from the GOT implemented using an
5713 UNSPEC), then return the base term. Otherwise return X. */
5716 ix86_find_base_term (rtx x)
5722 if (GET_CODE (x) != CONST)
5725 if (GET_CODE (term) == PLUS
5726 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5727 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5728 term = XEXP (term, 0);
5729 if (GET_CODE (term) != UNSPEC
5730 || XINT (term, 1) != UNSPEC_GOTPCREL)
5733 term = XVECEXP (term, 0, 0);
5735 if (GET_CODE (term) != SYMBOL_REF
5736 && GET_CODE (term) != LABEL_REF)
5742 term = ix86_delegitimize_address (x);
5744 if (GET_CODE (term) != SYMBOL_REF
5745 && GET_CODE (term) != LABEL_REF)
5751 /* Determine if a given RTX is a valid constant. We already know this
5752 satisfies CONSTANT_P. */
5755 legitimate_constant_p (rtx x)
5759 switch (GET_CODE (x))
5762 /* TLS symbols are not constant. */
5763 if (tls_symbolic_operand (x, Pmode))
5768 inner = XEXP (x, 0);
5770 /* Offsets of TLS symbols are never valid.
5771 Discourage CSE from creating them. */
5772 if (GET_CODE (inner) == PLUS
5773 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5776 if (GET_CODE (inner) == PLUS)
5778 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5780 inner = XEXP (inner, 0);
5783 /* Only some unspecs are valid as "constants". */
5784 if (GET_CODE (inner) == UNSPEC)
5785 switch (XINT (inner, 1))
5789 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5791 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5801 /* Otherwise we handle everything else in the move patterns. */
5805 /* Determine if it's legal to put X into the constant pool. This
5806 is not possible for the address of thread-local symbols, which
5807 is checked above. */
5810 ix86_cannot_force_const_mem (rtx x)
5812 return !legitimate_constant_p (x);
5815 /* Determine if a given RTX is a valid constant address. */
5818 constant_address_p (rtx x)
5820 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5823 /* Nonzero if the constant value X is a legitimate general operand
5824 when generating PIC code. It is given that flag_pic is on and
5825 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5828 legitimate_pic_operand_p (rtx x)
5832 switch (GET_CODE (x))
5835 inner = XEXP (x, 0);
5837 /* Only some unspecs are valid as "constants". */
5838 if (GET_CODE (inner) == UNSPEC)
5839 switch (XINT (inner, 1))
5842 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5850 return legitimate_pic_address_disp_p (x);
5857 /* Determine if a given CONST RTX is a valid memory displacement
5861 legitimate_pic_address_disp_p (rtx disp)
5865 /* In 64bit mode we can allow direct addresses of symbols and labels
5866 when they are not dynamic symbols. */
5869 /* TLS references should always be enclosed in UNSPEC. */
5870 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5872 if (GET_CODE (disp) == SYMBOL_REF
5873 && ix86_cmodel == CM_SMALL_PIC
5874 && SYMBOL_REF_LOCAL_P (disp))
5876 if (GET_CODE (disp) == LABEL_REF)
5878 if (GET_CODE (disp) == CONST
5879 && GET_CODE (XEXP (disp, 0)) == PLUS)
5881 rtx op0 = XEXP (XEXP (disp, 0), 0);
5882 rtx op1 = XEXP (XEXP (disp, 0), 1);
5884 /* TLS references should always be enclosed in UNSPEC. */
5885 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5887 if (((GET_CODE (op0) == SYMBOL_REF
5888 && ix86_cmodel == CM_SMALL_PIC
5889 && SYMBOL_REF_LOCAL_P (op0))
5890 || GET_CODE (op0) == LABEL_REF)
5891 && GET_CODE (op1) == CONST_INT
5892 && INTVAL (op1) < 16*1024*1024
5893 && INTVAL (op1) >= -16*1024*1024)
5897 if (GET_CODE (disp) != CONST)
5899 disp = XEXP (disp, 0);
5903 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5904 of GOT tables. We should not need these anyway. */
5905 if (GET_CODE (disp) != UNSPEC
5906 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5909 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5910 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5916 if (GET_CODE (disp) == PLUS)
5918 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5920 disp = XEXP (disp, 0);
5924 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5925 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5927 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5928 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5929 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5931 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5932 if (! strcmp (sym_name, "<pic base>"))
5937 if (GET_CODE (disp) != UNSPEC)
5940 switch (XINT (disp, 1))
5945 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5947 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5948 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5949 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5951 case UNSPEC_GOTTPOFF:
5952 case UNSPEC_GOTNTPOFF:
5953 case UNSPEC_INDNTPOFF:
5956 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5958 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5960 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5966 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5967 memory address for an instruction. The MODE argument is the machine mode
5968 for the MEM expression that wants to use this address.
5970 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5971 convert common non-canonical forms to canonical form so that they will
5975 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5977 struct ix86_address parts;
5978 rtx base, index, disp;
5979 HOST_WIDE_INT scale;
5980 const char *reason = NULL;
5981 rtx reason_rtx = NULL_RTX;
5983 if (TARGET_DEBUG_ADDR)
5986 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5987 GET_MODE_NAME (mode), strict);
5991 if (ix86_decompose_address (addr, &parts) <= 0)
5993 reason = "decomposition failed";
5998 index = parts.index;
6000 scale = parts.scale;
6002 /* Validate base register.
6004 Don't allow SUBREG's here, it can lead to spill failures when the base
6005 is one word out of a two word structure, which is represented internally
6012 if (GET_CODE (base) != REG)
6014 reason = "base is not a register";
6018 if (GET_MODE (base) != Pmode)
6020 reason = "base is not in Pmode";
6024 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6025 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6027 reason = "base is not valid";
6032 /* Validate index register.
6034 Don't allow SUBREG's here, it can lead to spill failures when the index
6035 is one word out of a two word structure, which is represented internally
6042 if (GET_CODE (index) != REG)
6044 reason = "index is not a register";
6048 if (GET_MODE (index) != Pmode)
6050 reason = "index is not in Pmode";
6054 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6055 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6057 reason = "index is not valid";
6062 /* Validate scale factor. */
6065 reason_rtx = GEN_INT (scale);
6068 reason = "scale without index";
6072 if (scale != 2 && scale != 4 && scale != 8)
6074 reason = "scale is not a valid multiplier";
6079 /* Validate displacement. */
6084 if (GET_CODE (disp) == CONST
6085 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6086 switch (XINT (XEXP (disp, 0), 1))
6090 case UNSPEC_GOTPCREL:
6093 goto is_legitimate_pic;
6095 case UNSPEC_GOTTPOFF:
6096 case UNSPEC_GOTNTPOFF:
6097 case UNSPEC_INDNTPOFF:
6103 reason = "invalid address unspec";
6107 else if (flag_pic && (SYMBOLIC_CONST (disp)
6109 && !machopic_operand_p (disp)
6114 if (TARGET_64BIT && (index || base))
6116 /* foo@dtpoff(%rX) is ok. */
6117 if (GET_CODE (disp) != CONST
6118 || GET_CODE (XEXP (disp, 0)) != PLUS
6119 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6120 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6121 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6122 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6124 reason = "non-constant pic memory reference";
6128 else if (! legitimate_pic_address_disp_p (disp))
6130 reason = "displacement is an invalid pic construct";
6134 /* This code used to verify that a symbolic pic displacement
6135 includes the pic_offset_table_rtx register.
6137 While this is good idea, unfortunately these constructs may
6138 be created by "adds using lea" optimization for incorrect
6147 This code is nonsensical, but results in addressing
6148 GOT table with pic_offset_table_rtx base. We can't
6149 just refuse it easily, since it gets matched by
6150 "addsi3" pattern, that later gets split to lea in the
6151 case output register differs from input. While this
6152 can be handled by separate addsi pattern for this case
6153 that never results in lea, this seems to be easier and
6154 correct fix for crash to disable this test. */
6156 else if (GET_CODE (disp) != LABEL_REF
6157 && GET_CODE (disp) != CONST_INT
6158 && (GET_CODE (disp) != CONST
6159 || !legitimate_constant_p (disp))
6160 && (GET_CODE (disp) != SYMBOL_REF
6161 || !legitimate_constant_p (disp)))
6163 reason = "displacement is not constant";
6166 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6168 reason = "displacement is out of range";
6173 /* Everything looks valid. */
6174 if (TARGET_DEBUG_ADDR)
6175 fprintf (stderr, "Success.\n");
6179 if (TARGET_DEBUG_ADDR)
6181 fprintf (stderr, "Error: %s\n", reason);
6182 debug_rtx (reason_rtx);
6187 /* Return an unique alias set for the GOT. */
6189 static HOST_WIDE_INT
6190 ix86_GOT_alias_set (void)
6192 static HOST_WIDE_INT set = -1;
6194 set = new_alias_set ();
6198 /* Return a legitimate reference for ORIG (an address) using the
6199 register REG. If REG is 0, a new pseudo is generated.
6201 There are two types of references that must be handled:
6203 1. Global data references must load the address from the GOT, via
6204 the PIC reg. An insn is emitted to do this load, and the reg is
6207 2. Static data references, constant pool addresses, and code labels
6208 compute the address as an offset from the GOT, whose base is in
6209 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6210 differentiate them from global data objects. The returned
6211 address is the PIC reg + an unspec constant.
6213 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6214 reg also appears in the address. */
6217 legitimize_pic_address (rtx orig, rtx reg)
6225 reg = gen_reg_rtx (Pmode);
6226 /* Use the generic Mach-O PIC machinery. */
6227 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6230 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6232 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6234 /* This symbol may be referenced via a displacement from the PIC
6235 base address (@GOTOFF). */
6237 if (reload_in_progress)
6238 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6239 if (GET_CODE (addr) == CONST)
6240 addr = XEXP (addr, 0);
6241 if (GET_CODE (addr) == PLUS)
6243 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6244 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6247 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6248 new = gen_rtx_CONST (Pmode, new);
6249 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6253 emit_move_insn (reg, new);
6257 else if (GET_CODE (addr) == SYMBOL_REF)
6261 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6262 new = gen_rtx_CONST (Pmode, new);
6263 new = gen_rtx_MEM (Pmode, new);
6264 RTX_UNCHANGING_P (new) = 1;
6265 set_mem_alias_set (new, ix86_GOT_alias_set ());
6268 reg = gen_reg_rtx (Pmode);
6269 /* Use directly gen_movsi, otherwise the address is loaded
6270 into register for CSE. We don't want to CSE this addresses,
6271 instead we CSE addresses from the GOT table, so skip this. */
6272 emit_insn (gen_movsi (reg, new));
6277 /* This symbol must be referenced via a load from the
6278 Global Offset Table (@GOT). */
6280 if (reload_in_progress)
6281 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6282 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6283 new = gen_rtx_CONST (Pmode, new);
6284 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6285 new = gen_rtx_MEM (Pmode, new);
6286 RTX_UNCHANGING_P (new) = 1;
6287 set_mem_alias_set (new, ix86_GOT_alias_set ());
6290 reg = gen_reg_rtx (Pmode);
6291 emit_move_insn (reg, new);
6297 if (GET_CODE (addr) == CONST)
6299 addr = XEXP (addr, 0);
6301 /* We must match stuff we generate before. Assume the only
6302 unspecs that can get here are ours. Not that we could do
6303 anything with them anyway.... */
6304 if (GET_CODE (addr) == UNSPEC
6305 || (GET_CODE (addr) == PLUS
6306 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6308 if (GET_CODE (addr) != PLUS)
6311 if (GET_CODE (addr) == PLUS)
6313 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6315 /* Check first to see if this is a constant offset from a @GOTOFF
6316 symbol reference. */
6317 if (local_symbolic_operand (op0, Pmode)
6318 && GET_CODE (op1) == CONST_INT)
6322 if (reload_in_progress)
6323 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6326 new = gen_rtx_PLUS (Pmode, new, op1);
6327 new = gen_rtx_CONST (Pmode, new);
6328 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6332 emit_move_insn (reg, new);
6338 if (INTVAL (op1) < -16*1024*1024
6339 || INTVAL (op1) >= 16*1024*1024)
6340 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6345 base = legitimize_pic_address (XEXP (addr, 0), reg);
6346 new = legitimize_pic_address (XEXP (addr, 1),
6347 base == reg ? NULL_RTX : reg);
6349 if (GET_CODE (new) == CONST_INT)
6350 new = plus_constant (base, INTVAL (new));
6353 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6355 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6356 new = XEXP (new, 1);
6358 new = gen_rtx_PLUS (Pmode, base, new);
6366 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6369 get_thread_pointer (int to_reg)
6373 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6377 reg = gen_reg_rtx (Pmode);
6378 insn = gen_rtx_SET (VOIDmode, reg, tp);
6379 insn = emit_insn (insn);
6384 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6385 false if we expect this to be used for a memory address and true if
6386 we expect to load the address into a register. */
6389 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6391 rtx dest, base, off, pic;
6396 case TLS_MODEL_GLOBAL_DYNAMIC:
6397 dest = gen_reg_rtx (Pmode);
6400 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6403 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6404 insns = get_insns ();
6407 emit_libcall_block (insns, dest, rax, x);
6410 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6413 case TLS_MODEL_LOCAL_DYNAMIC:
6414 base = gen_reg_rtx (Pmode);
6417 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6420 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6421 insns = get_insns ();
6424 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6425 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6426 emit_libcall_block (insns, base, rax, note);
6429 emit_insn (gen_tls_local_dynamic_base_32 (base));
6431 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6432 off = gen_rtx_CONST (Pmode, off);
6434 return gen_rtx_PLUS (Pmode, base, off);
6436 case TLS_MODEL_INITIAL_EXEC:
6440 type = UNSPEC_GOTNTPOFF;
6444 if (reload_in_progress)
6445 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6446 pic = pic_offset_table_rtx;
6447 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6449 else if (!TARGET_GNU_TLS)
6451 pic = gen_reg_rtx (Pmode);
6452 emit_insn (gen_set_got (pic));
6453 type = UNSPEC_GOTTPOFF;
6458 type = UNSPEC_INDNTPOFF;
6461 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6462 off = gen_rtx_CONST (Pmode, off);
6464 off = gen_rtx_PLUS (Pmode, pic, off);
6465 off = gen_rtx_MEM (Pmode, off);
6466 RTX_UNCHANGING_P (off) = 1;
6467 set_mem_alias_set (off, ix86_GOT_alias_set ());
6469 if (TARGET_64BIT || TARGET_GNU_TLS)
6471 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6472 off = force_reg (Pmode, off);
6473 return gen_rtx_PLUS (Pmode, base, off);
6477 base = get_thread_pointer (true);
6478 dest = gen_reg_rtx (Pmode);
6479 emit_insn (gen_subsi3 (dest, base, off));
6483 case TLS_MODEL_LOCAL_EXEC:
6484 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6485 (TARGET_64BIT || TARGET_GNU_TLS)
6486 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6487 off = gen_rtx_CONST (Pmode, off);
6489 if (TARGET_64BIT || TARGET_GNU_TLS)
6491 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6492 return gen_rtx_PLUS (Pmode, base, off);
6496 base = get_thread_pointer (true);
6497 dest = gen_reg_rtx (Pmode);
6498 emit_insn (gen_subsi3 (dest, base, off));
6509 /* Try machine-dependent ways of modifying an illegitimate address
6510 to be legitimate. If we find one, return the new, valid address.
6511 This macro is used in only one place: `memory_address' in explow.c.
6513 OLDX is the address as it was before break_out_memory_refs was called.
6514 In some cases it is useful to look at this to decide what needs to be done.
6516 MODE and WIN are passed so that this macro can use
6517 GO_IF_LEGITIMATE_ADDRESS.
6519 It is always safe for this macro to do nothing. It exists to recognize
6520 opportunities to optimize the output.
6522 For the 80386, we handle X+REG by loading X into a register R and
6523 using R+REG. R will go in a general reg and indexing will be used.
6524 However, if REG is a broken-out memory address or multiplication,
6525 nothing needs to be done because REG can certainly go in a general reg.
6527 When -fpic is used, special handling is needed for symbolic references.
6528 See comments by legitimize_pic_address in i386.c for details. */
6531 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6536 if (TARGET_DEBUG_ADDR)
6538 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6539 GET_MODE_NAME (mode));
6543 log = tls_symbolic_operand (x, mode);
6545 return legitimize_tls_address (x, log, false);
6547 if (flag_pic && SYMBOLIC_CONST (x))
6548 return legitimize_pic_address (x, 0);
6550 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6551 if (GET_CODE (x) == ASHIFT
6552 && GET_CODE (XEXP (x, 1)) == CONST_INT
6553 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6556 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6557 GEN_INT (1 << log));
6560 if (GET_CODE (x) == PLUS)
6562 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6564 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6565 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6566 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6569 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6570 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6571 GEN_INT (1 << log));
6574 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6575 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6576 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6579 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6580 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6581 GEN_INT (1 << log));
6584 /* Put multiply first if it isn't already. */
6585 if (GET_CODE (XEXP (x, 1)) == MULT)
6587 rtx tmp = XEXP (x, 0);
6588 XEXP (x, 0) = XEXP (x, 1);
6593 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6594 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6595 created by virtual register instantiation, register elimination, and
6596 similar optimizations. */
6597 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6600 x = gen_rtx_PLUS (Pmode,
6601 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6602 XEXP (XEXP (x, 1), 0)),
6603 XEXP (XEXP (x, 1), 1));
6607 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6608 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6609 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6610 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6611 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6612 && CONSTANT_P (XEXP (x, 1)))
6615 rtx other = NULL_RTX;
6617 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6619 constant = XEXP (x, 1);
6620 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6622 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6624 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6625 other = XEXP (x, 1);
6633 x = gen_rtx_PLUS (Pmode,
6634 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6635 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6636 plus_constant (other, INTVAL (constant)));
6640 if (changed && legitimate_address_p (mode, x, FALSE))
6643 if (GET_CODE (XEXP (x, 0)) == MULT)
6646 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6649 if (GET_CODE (XEXP (x, 1)) == MULT)
6652 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6656 && GET_CODE (XEXP (x, 1)) == REG
6657 && GET_CODE (XEXP (x, 0)) == REG)
6660 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6663 x = legitimize_pic_address (x, 0);
6666 if (changed && legitimate_address_p (mode, x, FALSE))
6669 if (GET_CODE (XEXP (x, 0)) == REG)
6671 rtx temp = gen_reg_rtx (Pmode);
6672 rtx val = force_operand (XEXP (x, 1), temp);
6674 emit_move_insn (temp, val);
6680 else if (GET_CODE (XEXP (x, 1)) == REG)
6682 rtx temp = gen_reg_rtx (Pmode);
6683 rtx val = force_operand (XEXP (x, 0), temp);
6685 emit_move_insn (temp, val);
6695 /* Print an integer constant expression in assembler syntax. Addition
6696 and subtraction are the only arithmetic that may appear in these
6697 expressions. FILE is the stdio stream to write to, X is the rtx, and
6698 CODE is the operand print code from the output string. */
6701 output_pic_addr_const (FILE *file, rtx x, int code)
6705 switch (GET_CODE (x))
6715 assemble_name (file, XSTR (x, 0));
6716 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6717 fputs ("@PLT", file);
6724 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6725 assemble_name (asm_out_file, buf);
6729 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6733 /* This used to output parentheses around the expression,
6734 but that does not work on the 386 (either ATT or BSD assembler). */
6735 output_pic_addr_const (file, XEXP (x, 0), code);
6739 if (GET_MODE (x) == VOIDmode)
6741 /* We can use %d if the number is <32 bits and positive. */
6742 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6743 fprintf (file, "0x%lx%08lx",
6744 (unsigned long) CONST_DOUBLE_HIGH (x),
6745 (unsigned long) CONST_DOUBLE_LOW (x));
6747 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6750 /* We can't handle floating point constants;
6751 PRINT_OPERAND must handle them. */
6752 output_operand_lossage ("floating constant misused");
6756 /* Some assemblers need integer constants to appear first. */
6757 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6759 output_pic_addr_const (file, XEXP (x, 0), code);
6761 output_pic_addr_const (file, XEXP (x, 1), code);
6763 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6765 output_pic_addr_const (file, XEXP (x, 1), code);
6767 output_pic_addr_const (file, XEXP (x, 0), code);
6775 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6776 output_pic_addr_const (file, XEXP (x, 0), code);
6778 output_pic_addr_const (file, XEXP (x, 1), code);
6780 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6784 if (XVECLEN (x, 0) != 1)
6786 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6787 switch (XINT (x, 1))
6790 fputs ("@GOT", file);
6793 fputs ("@GOTOFF", file);
6795 case UNSPEC_GOTPCREL:
6796 fputs ("@GOTPCREL(%rip)", file);
6798 case UNSPEC_GOTTPOFF:
6799 /* FIXME: This might be @TPOFF in Sun ld too. */
6800 fputs ("@GOTTPOFF", file);
6803 fputs ("@TPOFF", file);
6807 fputs ("@TPOFF", file);
6809 fputs ("@NTPOFF", file);
6812 fputs ("@DTPOFF", file);
6814 case UNSPEC_GOTNTPOFF:
6816 fputs ("@GOTTPOFF(%rip)", file);
6818 fputs ("@GOTNTPOFF", file);
6820 case UNSPEC_INDNTPOFF:
6821 fputs ("@INDNTPOFF", file);
6824 output_operand_lossage ("invalid UNSPEC as operand");
6830 output_operand_lossage ("invalid expression as operand");
6834 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6835 We need to handle our special PIC relocations. */
6838 i386_dwarf_output_addr_const (FILE *file, rtx x)
6841 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6845 fprintf (file, "%s", ASM_LONG);
6848 output_pic_addr_const (file, x, '\0');
6850 output_addr_const (file, x);
6854 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6855 We need to emit DTP-relative relocations. */
6858 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6860 fputs (ASM_LONG, file);
6861 output_addr_const (file, x);
6862 fputs ("@DTPOFF", file);
6868 fputs (", 0", file);
6875 /* In the name of slightly smaller debug output, and to cater to
6876 general assembler losage, recognize PIC+GOTOFF and turn it back
6877 into a direct symbol reference. */
6880 ix86_delegitimize_address (rtx orig_x)
6884 if (GET_CODE (x) == MEM)
6889 if (GET_CODE (x) != CONST
6890 || GET_CODE (XEXP (x, 0)) != UNSPEC
6891 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6892 || GET_CODE (orig_x) != MEM)
6894 return XVECEXP (XEXP (x, 0), 0, 0);
6897 if (GET_CODE (x) != PLUS
6898 || GET_CODE (XEXP (x, 1)) != CONST)
6901 if (GET_CODE (XEXP (x, 0)) == REG
6902 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6903 /* %ebx + GOT/GOTOFF */
6905 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6907 /* %ebx + %reg * scale + GOT/GOTOFF */
6909 if (GET_CODE (XEXP (y, 0)) == REG
6910 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6912 else if (GET_CODE (XEXP (y, 1)) == REG
6913 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6917 if (GET_CODE (y) != REG
6918 && GET_CODE (y) != MULT
6919 && GET_CODE (y) != ASHIFT)
6925 x = XEXP (XEXP (x, 1), 0);
6926 if (GET_CODE (x) == UNSPEC
6927 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6928 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6931 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6932 return XVECEXP (x, 0, 0);
6935 if (GET_CODE (x) == PLUS
6936 && GET_CODE (XEXP (x, 0)) == UNSPEC
6937 && GET_CODE (XEXP (x, 1)) == CONST_INT
6938 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6939 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6940 && GET_CODE (orig_x) != MEM)))
6942 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6944 return gen_rtx_PLUS (Pmode, y, x);
6952 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6957 if (mode == CCFPmode || mode == CCFPUmode)
6959 enum rtx_code second_code, bypass_code;
6960 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6961 if (bypass_code != NIL || second_code != NIL)
6963 code = ix86_fp_compare_code_to_integer (code);
6967 code = reverse_condition (code);
6978 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6983 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6984 Those same assemblers have the same but opposite losage on cmov. */
6987 suffix = fp ? "nbe" : "a";
6990 if (mode == CCNOmode || mode == CCGOCmode)
6992 else if (mode == CCmode || mode == CCGCmode)
7003 if (mode == CCNOmode || mode == CCGOCmode)
7005 else if (mode == CCmode || mode == CCGCmode)
7014 suffix = fp ? "nb" : "ae";
7017 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7027 suffix = fp ? "u" : "p";
7030 suffix = fp ? "nu" : "np";
7035 fputs (suffix, file);
7038 /* Print the name of register X to FILE based on its machine mode and number.
7039 If CODE is 'w', pretend the mode is HImode.
7040 If CODE is 'b', pretend the mode is QImode.
7041 If CODE is 'k', pretend the mode is SImode.
7042 If CODE is 'q', pretend the mode is DImode.
7043 If CODE is 'h', pretend the reg is the `high' byte register.
7044 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7047 print_reg (rtx x, int code, FILE *file)
7049 if (REGNO (x) == ARG_POINTER_REGNUM
7050 || REGNO (x) == FRAME_POINTER_REGNUM
7051 || REGNO (x) == FLAGS_REG
7052 || REGNO (x) == FPSR_REG)
7055 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7058 if (code == 'w' || MMX_REG_P (x))
7060 else if (code == 'b')
7062 else if (code == 'k')
7064 else if (code == 'q')
7066 else if (code == 'y')
7068 else if (code == 'h')
7071 code = GET_MODE_SIZE (GET_MODE (x));
7073 /* Irritatingly, AMD extended registers use different naming convention
7074 from the normal registers. */
7075 if (REX_INT_REG_P (x))
7082 error ("extended registers have no high halves");
7085 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7088 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7091 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7094 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7097 error ("unsupported operand size for extended register");
7105 if (STACK_TOP_P (x))
7107 fputs ("st(0)", file);
7114 if (! ANY_FP_REG_P (x))
7115 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7120 fputs (hi_reg_name[REGNO (x)], file);
7123 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7125 fputs (qi_reg_name[REGNO (x)], file);
7128 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7130 fputs (qi_high_reg_name[REGNO (x)], file);
7137 /* Locate some local-dynamic symbol still in use by this function
7138 so that we can print its name in some tls_local_dynamic_base
7142 get_some_local_dynamic_name (void)
7146 if (cfun->machine->some_ld_name)
7147 return cfun->machine->some_ld_name;
7149 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7151 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7152 return cfun->machine->some_ld_name;
7158 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7162 if (GET_CODE (x) == SYMBOL_REF
7163 && local_dynamic_symbolic_operand (x, Pmode))
7165 cfun->machine->some_ld_name = XSTR (x, 0);
7173 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7174 C -- print opcode suffix for set/cmov insn.
7175 c -- like C, but print reversed condition
7176 F,f -- likewise, but for floating-point.
7177 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7179 R -- print the prefix for register names.
7180 z -- print the opcode suffix for the size of the current operand.
7181 * -- print a star (in certain assembler syntax)
7182 A -- print an absolute memory reference.
7183 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7184 s -- print a shift double count, followed by the assemblers argument
7186 b -- print the QImode name of the register for the indicated operand.
7187 %b0 would print %al if operands[0] is reg 0.
7188 w -- likewise, print the HImode name of the register.
7189 k -- likewise, print the SImode name of the register.
7190 q -- likewise, print the DImode name of the register.
7191 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7192 y -- print "st(0)" instead of "st" as a register.
7193 D -- print condition for SSE cmp instruction.
7194 P -- if PIC, print an @PLT suffix.
7195 X -- don't print any sort of PIC '@' suffix for a symbol.
7196 & -- print some in-use local-dynamic symbol name.
7200 print_operand (FILE *file, rtx x, int code)
7207 if (ASSEMBLER_DIALECT == ASM_ATT)
7212 assemble_name (file, get_some_local_dynamic_name ());
7216 if (ASSEMBLER_DIALECT == ASM_ATT)
7218 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7220 /* Intel syntax. For absolute addresses, registers should not
7221 be surrounded by braces. */
7222 if (GET_CODE (x) != REG)
7225 PRINT_OPERAND (file, x, 0);
7233 PRINT_OPERAND (file, x, 0);
7238 if (ASSEMBLER_DIALECT == ASM_ATT)
7243 if (ASSEMBLER_DIALECT == ASM_ATT)
7248 if (ASSEMBLER_DIALECT == ASM_ATT)
7253 if (ASSEMBLER_DIALECT == ASM_ATT)
7258 if (ASSEMBLER_DIALECT == ASM_ATT)
7263 if (ASSEMBLER_DIALECT == ASM_ATT)
7268 /* 387 opcodes don't get size suffixes if the operands are
7270 if (STACK_REG_P (x))
7273 /* Likewise if using Intel opcodes. */
7274 if (ASSEMBLER_DIALECT == ASM_INTEL)
7277 /* This is the size of op from size of operand. */
7278 switch (GET_MODE_SIZE (GET_MODE (x)))
7281 #ifdef HAVE_GAS_FILDS_FISTS
7287 if (GET_MODE (x) == SFmode)
7302 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7304 #ifdef GAS_MNEMONICS
7330 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7332 PRINT_OPERAND (file, x, 0);
7338 /* Little bit of braindamage here. The SSE compare instructions
7339 does use completely different names for the comparisons that the
7340 fp conditional moves. */
7341 switch (GET_CODE (x))
7356 fputs ("unord", file);
7360 fputs ("neq", file);
7364 fputs ("nlt", file);
7368 fputs ("nle", file);
7371 fputs ("ord", file);
7379 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7380 if (ASSEMBLER_DIALECT == ASM_ATT)
7382 switch (GET_MODE (x))
7384 case HImode: putc ('w', file); break;
7386 case SFmode: putc ('l', file); break;
7388 case DFmode: putc ('q', file); break;
7396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7399 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7400 if (ASSEMBLER_DIALECT == ASM_ATT)
7403 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7406 /* Like above, but reverse condition */
7408 /* Check to see if argument to %c is really a constant
7409 and not a condition code which needs to be reversed. */
7410 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7412 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7418 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7419 if (ASSEMBLER_DIALECT == ASM_ATT)
7422 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7428 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7431 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7434 int pred_val = INTVAL (XEXP (x, 0));
7436 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7437 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7439 int taken = pred_val > REG_BR_PROB_BASE / 2;
7440 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7442 /* Emit hints only in the case default branch prediction
7443 heuristics would fail. */
7444 if (taken != cputaken)
7446 /* We use 3e (DS) prefix for taken branches and
7447 2e (CS) prefix for not taken branches. */
7449 fputs ("ds ; ", file);
7451 fputs ("cs ; ", file);
7458 output_operand_lossage ("invalid operand code `%c'", code);
7462 if (GET_CODE (x) == REG)
7463 print_reg (x, code, file);
7465 else if (GET_CODE (x) == MEM)
7467 /* No `byte ptr' prefix for call instructions. */
7468 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7471 switch (GET_MODE_SIZE (GET_MODE (x)))
7473 case 1: size = "BYTE"; break;
7474 case 2: size = "WORD"; break;
7475 case 4: size = "DWORD"; break;
7476 case 8: size = "QWORD"; break;
7477 case 12: size = "XWORD"; break;
7478 case 16: size = "XMMWORD"; break;
7483 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7486 else if (code == 'w')
7488 else if (code == 'k')
7492 fputs (" PTR ", file);
7496 /* Avoid (%rip) for call operands. */
7497 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7498 && GET_CODE (x) != CONST_INT)
7499 output_addr_const (file, x);
7500 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7501 output_operand_lossage ("invalid constraints for operand");
7506 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7511 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7512 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7514 if (ASSEMBLER_DIALECT == ASM_ATT)
7516 fprintf (file, "0x%08lx", l);
7519 /* These float cases don't actually occur as immediate operands. */
7520 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7524 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7525 fprintf (file, "%s", dstr);
7528 else if (GET_CODE (x) == CONST_DOUBLE
7529 && GET_MODE (x) == XFmode)
7533 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7534 fprintf (file, "%s", dstr);
7541 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7543 if (ASSEMBLER_DIALECT == ASM_ATT)
7546 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7547 || GET_CODE (x) == LABEL_REF)
7549 if (ASSEMBLER_DIALECT == ASM_ATT)
7552 fputs ("OFFSET FLAT:", file);
7555 if (GET_CODE (x) == CONST_INT)
7556 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7558 output_pic_addr_const (file, x, code);
7560 output_addr_const (file, x);
7564 /* Print a memory operand whose address is ADDR. */
7567 print_operand_address (FILE *file, rtx addr)
7569 struct ix86_address parts;
7570 rtx base, index, disp;
7573 if (! ix86_decompose_address (addr, &parts))
7577 index = parts.index;
7579 scale = parts.scale;
7587 if (USER_LABEL_PREFIX[0] == 0)
7589 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7595 if (!base && !index)
7597 /* Displacement only requires special attention. */
7599 if (GET_CODE (disp) == CONST_INT)
7601 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7603 if (USER_LABEL_PREFIX[0] == 0)
7605 fputs ("ds:", file);
7607 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7610 output_pic_addr_const (file, disp, 0);
7612 output_addr_const (file, disp);
7614 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7616 && ((GET_CODE (disp) == SYMBOL_REF
7617 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7618 || GET_CODE (disp) == LABEL_REF
7619 || (GET_CODE (disp) == CONST
7620 && GET_CODE (XEXP (disp, 0)) == PLUS
7621 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7622 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7623 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7624 fputs ("(%rip)", file);
7628 if (ASSEMBLER_DIALECT == ASM_ATT)
7633 output_pic_addr_const (file, disp, 0);
7634 else if (GET_CODE (disp) == LABEL_REF)
7635 output_asm_label (disp);
7637 output_addr_const (file, disp);
7642 print_reg (base, 0, file);
7646 print_reg (index, 0, file);
7648 fprintf (file, ",%d", scale);
7654 rtx offset = NULL_RTX;
7658 /* Pull out the offset of a symbol; print any symbol itself. */
7659 if (GET_CODE (disp) == CONST
7660 && GET_CODE (XEXP (disp, 0)) == PLUS
7661 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7663 offset = XEXP (XEXP (disp, 0), 1);
7664 disp = gen_rtx_CONST (VOIDmode,
7665 XEXP (XEXP (disp, 0), 0));
7669 output_pic_addr_const (file, disp, 0);
7670 else if (GET_CODE (disp) == LABEL_REF)
7671 output_asm_label (disp);
7672 else if (GET_CODE (disp) == CONST_INT)
7675 output_addr_const (file, disp);
7681 print_reg (base, 0, file);
7684 if (INTVAL (offset) >= 0)
7686 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7697 print_reg (index, 0, file);
7699 fprintf (file, "*%d", scale);
7707 output_addr_const_extra (FILE *file, rtx x)
7711 if (GET_CODE (x) != UNSPEC)
7714 op = XVECEXP (x, 0, 0);
7715 switch (XINT (x, 1))
7717 case UNSPEC_GOTTPOFF:
7718 output_addr_const (file, op);
7719 /* FIXME: This might be @TPOFF in Sun ld. */
7720 fputs ("@GOTTPOFF", file);
7723 output_addr_const (file, op);
7724 fputs ("@TPOFF", file);
7727 output_addr_const (file, op);
7729 fputs ("@TPOFF", file);
7731 fputs ("@NTPOFF", file);
7734 output_addr_const (file, op);
7735 fputs ("@DTPOFF", file);
7737 case UNSPEC_GOTNTPOFF:
7738 output_addr_const (file, op);
7740 fputs ("@GOTTPOFF(%rip)", file);
7742 fputs ("@GOTNTPOFF", file);
7744 case UNSPEC_INDNTPOFF:
7745 output_addr_const (file, op);
7746 fputs ("@INDNTPOFF", file);
7756 /* Split one or more DImode RTL references into pairs of SImode
7757 references. The RTL can be REG, offsettable MEM, integer constant, or
7758 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7759 split and "num" is its length. lo_half and hi_half are output arrays
7760 that parallel "operands". */
7763 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7767 rtx op = operands[num];
7769 /* simplify_subreg refuse to split volatile memory addresses,
7770 but we still have to handle it. */
7771 if (GET_CODE (op) == MEM)
7773 lo_half[num] = adjust_address (op, SImode, 0);
7774 hi_half[num] = adjust_address (op, SImode, 4);
7778 lo_half[num] = simplify_gen_subreg (SImode, op,
7779 GET_MODE (op) == VOIDmode
7780 ? DImode : GET_MODE (op), 0);
7781 hi_half[num] = simplify_gen_subreg (SImode, op,
7782 GET_MODE (op) == VOIDmode
7783 ? DImode : GET_MODE (op), 4);
7787 /* Split one or more TImode RTL references into pairs of SImode
7788 references. The RTL can be REG, offsettable MEM, integer constant, or
7789 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7790 split and "num" is its length. lo_half and hi_half are output arrays
7791 that parallel "operands". */
7794 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7798 rtx op = operands[num];
7800 /* simplify_subreg refuse to split volatile memory addresses, but we
7801 still have to handle it. */
7802 if (GET_CODE (op) == MEM)
7804 lo_half[num] = adjust_address (op, DImode, 0);
7805 hi_half[num] = adjust_address (op, DImode, 8);
7809 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7810 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7815 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7816 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7817 is the expression of the binary operation. The output may either be
7818 emitted here, or returned to the caller, like all output_* functions.
7820 There is no guarantee that the operands are the same mode, as they
7821 might be within FLOAT or FLOAT_EXTEND expressions. */
7823 #ifndef SYSV386_COMPAT
7824 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7825 wants to fix the assemblers because that causes incompatibility
7826 with gcc. No-one wants to fix gcc because that causes
7827 incompatibility with assemblers... You can use the option of
7828 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7829 #define SYSV386_COMPAT 1
7833 output_387_binary_op (rtx insn, rtx *operands)
7835 static char buf[30];
7838 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7840 #ifdef ENABLE_CHECKING
7841 /* Even if we do not want to check the inputs, this documents input
7842 constraints. Which helps in understanding the following code. */
7843 if (STACK_REG_P (operands[0])
7844 && ((REG_P (operands[1])
7845 && REGNO (operands[0]) == REGNO (operands[1])
7846 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7847 || (REG_P (operands[2])
7848 && REGNO (operands[0]) == REGNO (operands[2])
7849 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7850 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7856 switch (GET_CODE (operands[3]))
7859 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7860 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7868 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7869 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7901 if (GET_MODE (operands[0]) == SFmode)
7902 strcat (buf, "ss\t{%2, %0|%0, %2}");
7904 strcat (buf, "sd\t{%2, %0|%0, %2}");
7909 switch (GET_CODE (operands[3]))
7913 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7915 rtx temp = operands[2];
7916 operands[2] = operands[1];
7920 /* know operands[0] == operands[1]. */
7922 if (GET_CODE (operands[2]) == MEM)
7928 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7930 if (STACK_TOP_P (operands[0]))
7931 /* How is it that we are storing to a dead operand[2]?
7932 Well, presumably operands[1] is dead too. We can't
7933 store the result to st(0) as st(0) gets popped on this
7934 instruction. Instead store to operands[2] (which I
7935 think has to be st(1)). st(1) will be popped later.
7936 gcc <= 2.8.1 didn't have this check and generated
7937 assembly code that the Unixware assembler rejected. */
7938 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7940 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7944 if (STACK_TOP_P (operands[0]))
7945 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7947 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7952 if (GET_CODE (operands[1]) == MEM)
7958 if (GET_CODE (operands[2]) == MEM)
7964 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7967 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7968 derived assemblers, confusingly reverse the direction of
7969 the operation for fsub{r} and fdiv{r} when the
7970 destination register is not st(0). The Intel assembler
7971 doesn't have this brain damage. Read !SYSV386_COMPAT to
7972 figure out what the hardware really does. */
7973 if (STACK_TOP_P (operands[0]))
7974 p = "{p\t%0, %2|rp\t%2, %0}";
7976 p = "{rp\t%2, %0|p\t%0, %2}";
7978 if (STACK_TOP_P (operands[0]))
7979 /* As above for fmul/fadd, we can't store to st(0). */
7980 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7982 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7990 if (STACK_TOP_P (operands[0]))
7991 p = "{rp\t%0, %1|p\t%1, %0}";
7993 p = "{p\t%1, %0|rp\t%0, %1}";
7995 if (STACK_TOP_P (operands[0]))
7996 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7998 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8003 if (STACK_TOP_P (operands[0]))
8005 if (STACK_TOP_P (operands[1]))
8006 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8008 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8011 else if (STACK_TOP_P (operands[1]))
8014 p = "{\t%1, %0|r\t%0, %1}";
8016 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8022 p = "{r\t%2, %0|\t%0, %2}";
8024 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8037 /* Output code to initialize control word copies used by
8038 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8039 is set to control word rounding downwards. */
8041 emit_i387_cw_initialization (rtx normal, rtx round_down)
8043 rtx reg = gen_reg_rtx (HImode);
8045 emit_insn (gen_x86_fnstcw_1 (normal));
8046 emit_move_insn (reg, normal);
8047 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8049 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8051 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8052 emit_move_insn (round_down, reg);
8055 /* Output code for INSN to convert a float to a signed int. OPERANDS
8056 are the insn operands. The output may be [HSD]Imode and the input
8057 operand may be [SDX]Fmode. */
8060 output_fix_trunc (rtx insn, rtx *operands)
8062 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8063 int dimode_p = GET_MODE (operands[0]) == DImode;
8065 /* Jump through a hoop or two for DImode, since the hardware has no
8066 non-popping instruction. We used to do this a different way, but
8067 that was somewhat fragile and broke with post-reload splitters. */
8068 if (dimode_p && !stack_top_dies)
8069 output_asm_insn ("fld\t%y1", operands);
8071 if (!STACK_TOP_P (operands[1]))
8074 if (GET_CODE (operands[0]) != MEM)
8077 output_asm_insn ("fldcw\t%3", operands);
8078 if (stack_top_dies || dimode_p)
8079 output_asm_insn ("fistp%z0\t%0", operands);
8081 output_asm_insn ("fist%z0\t%0", operands);
8082 output_asm_insn ("fldcw\t%2", operands);
8087 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8088 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8089 when fucom should be used. */
8092 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8095 rtx cmp_op0 = operands[0];
8096 rtx cmp_op1 = operands[1];
8097 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8102 cmp_op1 = operands[2];
8106 if (GET_MODE (operands[0]) == SFmode)
8108 return "ucomiss\t{%1, %0|%0, %1}";
8110 return "comiss\t{%1, %0|%0, %1}";
8113 return "ucomisd\t{%1, %0|%0, %1}";
8115 return "comisd\t{%1, %0|%0, %1}";
8118 if (! STACK_TOP_P (cmp_op0))
8121 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8123 if (STACK_REG_P (cmp_op1)
8125 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8126 && REGNO (cmp_op1) != FIRST_STACK_REG)
8128 /* If both the top of the 387 stack dies, and the other operand
8129 is also a stack register that dies, then this must be a
8130 `fcompp' float compare */
8134 /* There is no double popping fcomi variant. Fortunately,
8135 eflags is immune from the fstp's cc clobbering. */
8137 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8139 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8147 return "fucompp\n\tfnstsw\t%0";
8149 return "fcompp\n\tfnstsw\t%0";
8162 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8164 static const char * const alt[24] =
8176 "fcomi\t{%y1, %0|%0, %y1}",
8177 "fcomip\t{%y1, %0|%0, %y1}",
8178 "fucomi\t{%y1, %0|%0, %y1}",
8179 "fucomip\t{%y1, %0|%0, %y1}",
8186 "fcom%z2\t%y2\n\tfnstsw\t%0",
8187 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8188 "fucom%z2\t%y2\n\tfnstsw\t%0",
8189 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8191 "ficom%z2\t%y2\n\tfnstsw\t%0",
8192 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8200 mask = eflags_p << 3;
8201 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8202 mask |= unordered_p << 1;
8203 mask |= stack_top_dies;
8216 ix86_output_addr_vec_elt (FILE *file, int value)
8218 const char *directive = ASM_LONG;
8223 directive = ASM_QUAD;
8229 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8233 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8236 fprintf (file, "%s%s%d-%s%d\n",
8237 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8238 else if (HAVE_AS_GOTOFF_IN_DATA)
8239 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8241 else if (TARGET_MACHO)
8243 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8244 machopic_output_function_base_name (file);
8245 fprintf(file, "\n");
8249 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8250 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8253 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8257 ix86_expand_clear (rtx dest)
8261 /* We play register width games, which are only valid after reload. */
8262 if (!reload_completed)
8265 /* Avoid HImode and its attendant prefix byte. */
8266 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8267 dest = gen_rtx_REG (SImode, REGNO (dest));
8269 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8271 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8272 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8274 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8275 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8281 /* X is an unchanging MEM. If it is a constant pool reference, return
8282 the constant pool rtx, else NULL. */
8285 maybe_get_pool_constant (rtx x)
8287 x = ix86_delegitimize_address (XEXP (x, 0));
8289 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8290 return get_pool_constant (x);
8296 ix86_expand_move (enum machine_mode mode, rtx operands[])
8298 int strict = (reload_in_progress || reload_completed);
8300 enum tls_model model;
8305 model = tls_symbolic_operand (op1, Pmode);
8308 op1 = legitimize_tls_address (op1, model, true);
8309 op1 = force_operand (op1, op0);
8314 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8319 rtx temp = ((reload_in_progress
8320 || ((op0 && GET_CODE (op0) == REG)
8322 ? op0 : gen_reg_rtx (Pmode));
8323 op1 = machopic_indirect_data_reference (op1, temp);
8324 op1 = machopic_legitimize_pic_address (op1, mode,
8325 temp == op1 ? 0 : temp);
8327 else if (MACHOPIC_INDIRECT)
8328 op1 = machopic_indirect_data_reference (op1, 0);
8332 if (GET_CODE (op0) == MEM)
8333 op1 = force_reg (Pmode, op1);
8337 if (GET_CODE (temp) != REG)
8338 temp = gen_reg_rtx (Pmode);
8339 temp = legitimize_pic_address (op1, temp);
8344 #endif /* TARGET_MACHO */
8348 if (GET_CODE (op0) == MEM
8349 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8350 || !push_operand (op0, mode))
8351 && GET_CODE (op1) == MEM)
8352 op1 = force_reg (mode, op1);
8354 if (push_operand (op0, mode)
8355 && ! general_no_elim_operand (op1, mode))
8356 op1 = copy_to_mode_reg (mode, op1);
8358 /* Force large constants in 64bit compilation into register
8359 to get them CSEed. */
8360 if (TARGET_64BIT && mode == DImode
8361 && immediate_operand (op1, mode)
8362 && !x86_64_zero_extended_value (op1)
8363 && !register_operand (op0, mode)
8364 && optimize && !reload_completed && !reload_in_progress)
8365 op1 = copy_to_mode_reg (mode, op1);
8367 if (FLOAT_MODE_P (mode))
8369 /* If we are loading a floating point constant to a register,
8370 force the value to memory now, since we'll get better code
8371 out the back end. */
8375 else if (GET_CODE (op1) == CONST_DOUBLE)
8377 op1 = validize_mem (force_const_mem (mode, op1));
8378 if (!register_operand (op0, mode))
8380 rtx temp = gen_reg_rtx (mode);
8381 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8382 emit_move_insn (op0, temp);
8389 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8393 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8395 /* Force constants other than zero into memory. We do not know how
8396 the instructions used to build constants modify the upper 64 bits
8397 of the register, once we have that information we may be able
8398 to handle some of them more efficiently. */
8399 if ((reload_in_progress | reload_completed) == 0
8400 && register_operand (operands[0], mode)
8401 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8402 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8404 /* Make operand1 a register if it isn't already. */
8406 && !register_operand (operands[0], mode)
8407 && !register_operand (operands[1], mode))
8409 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8410 emit_move_insn (operands[0], temp);
8414 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8417 /* Attempt to expand a binary operator. Make the expansion closer to the
8418 actual machine, then just general_operand, which will allow 3 separate
8419 memory references (one output, two input) in a single insn. */
8422 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8425 int matching_memory;
8426 rtx src1, src2, dst, op, clob;
8432 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8433 if (GET_RTX_CLASS (code) == 'c'
8434 && (rtx_equal_p (dst, src2)
8435 || immediate_operand (src1, mode)))
8442 /* If the destination is memory, and we do not have matching source
8443 operands, do things in registers. */
8444 matching_memory = 0;
8445 if (GET_CODE (dst) == MEM)
8447 if (rtx_equal_p (dst, src1))
8448 matching_memory = 1;
8449 else if (GET_RTX_CLASS (code) == 'c'
8450 && rtx_equal_p (dst, src2))
8451 matching_memory = 2;
8453 dst = gen_reg_rtx (mode);
8456 /* Both source operands cannot be in memory. */
8457 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8459 if (matching_memory != 2)
8460 src2 = force_reg (mode, src2);
8462 src1 = force_reg (mode, src1);
8465 /* If the operation is not commutable, source 1 cannot be a constant
8466 or non-matching memory. */
8467 if ((CONSTANT_P (src1)
8468 || (!matching_memory && GET_CODE (src1) == MEM))
8469 && GET_RTX_CLASS (code) != 'c')
8470 src1 = force_reg (mode, src1);
8472 /* If optimizing, copy to regs to improve CSE */
8473 if (optimize && ! no_new_pseudos)
8475 if (GET_CODE (dst) == MEM)
8476 dst = gen_reg_rtx (mode);
8477 if (GET_CODE (src1) == MEM)
8478 src1 = force_reg (mode, src1);
8479 if (GET_CODE (src2) == MEM)
8480 src2 = force_reg (mode, src2);
8483 /* Emit the instruction. */
8485 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8486 if (reload_in_progress)
8488 /* Reload doesn't know about the flags register, and doesn't know that
8489 it doesn't want to clobber it. We can only do this with PLUS. */
8496 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8497 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8500 /* Fix up the destination if needed. */
8501 if (dst != operands[0])
8502 emit_move_insn (operands[0], dst);
8505 /* Return TRUE or FALSE depending on whether the binary operator meets the
8506 appropriate constraints. */
8509 ix86_binary_operator_ok (enum rtx_code code,
8510 enum machine_mode mode ATTRIBUTE_UNUSED,
8513 /* Both source operands cannot be in memory. */
8514 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8516 /* If the operation is not commutable, source 1 cannot be a constant. */
8517 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8519 /* If the destination is memory, we must have a matching source operand. */
8520 if (GET_CODE (operands[0]) == MEM
8521 && ! (rtx_equal_p (operands[0], operands[1])
8522 || (GET_RTX_CLASS (code) == 'c'
8523 && rtx_equal_p (operands[0], operands[2]))))
8525 /* If the operation is not commutable and the source 1 is memory, we must
8526 have a matching destination. */
8527 if (GET_CODE (operands[1]) == MEM
8528 && GET_RTX_CLASS (code) != 'c'
8529 && ! rtx_equal_p (operands[0], operands[1]))
8534 /* Attempt to expand a unary operator. Make the expansion closer to the
8535 actual machine, then just general_operand, which will allow 2 separate
8536 memory references (one output, one input) in a single insn. */
8539 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8542 int matching_memory;
8543 rtx src, dst, op, clob;
8548 /* If the destination is memory, and we do not have matching source
8549 operands, do things in registers. */
8550 matching_memory = 0;
8551 if (GET_CODE (dst) == MEM)
8553 if (rtx_equal_p (dst, src))
8554 matching_memory = 1;
8556 dst = gen_reg_rtx (mode);
8559 /* When source operand is memory, destination must match. */
8560 if (!matching_memory && GET_CODE (src) == MEM)
8561 src = force_reg (mode, src);
8563 /* If optimizing, copy to regs to improve CSE */
8564 if (optimize && ! no_new_pseudos)
8566 if (GET_CODE (dst) == MEM)
8567 dst = gen_reg_rtx (mode);
8568 if (GET_CODE (src) == MEM)
8569 src = force_reg (mode, src);
8572 /* Emit the instruction. */
8574 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8575 if (reload_in_progress || code == NOT)
8577 /* Reload doesn't know about the flags register, and doesn't know that
8578 it doesn't want to clobber it. */
8585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8586 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8589 /* Fix up the destination if needed. */
8590 if (dst != operands[0])
8591 emit_move_insn (operands[0], dst);
8594 /* Return TRUE or FALSE depending on whether the unary operator meets the
8595 appropriate constraints. */
8598 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8599 enum machine_mode mode ATTRIBUTE_UNUSED,
8600 rtx operands[2] ATTRIBUTE_UNUSED)
8602 /* If one of operands is memory, source and destination must match. */
8603 if ((GET_CODE (operands[0]) == MEM
8604 || GET_CODE (operands[1]) == MEM)
8605 && ! rtx_equal_p (operands[0], operands[1]))
8610 /* Return TRUE or FALSE depending on whether the first SET in INSN
8611 has source and destination with matching CC modes, and that the
8612 CC mode is at least as constrained as REQ_MODE. */
8615 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8618 enum machine_mode set_mode;
8620 set = PATTERN (insn);
8621 if (GET_CODE (set) == PARALLEL)
8622 set = XVECEXP (set, 0, 0);
8623 if (GET_CODE (set) != SET)
8625 if (GET_CODE (SET_SRC (set)) != COMPARE)
8628 set_mode = GET_MODE (SET_DEST (set));
8632 if (req_mode != CCNOmode
8633 && (req_mode != CCmode
8634 || XEXP (SET_SRC (set), 1) != const0_rtx))
8638 if (req_mode == CCGCmode)
8642 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8646 if (req_mode == CCZmode)
8656 return (GET_MODE (SET_SRC (set)) == set_mode);
8659 /* Generate insn patterns to do an integer compare of OPERANDS. */
8662 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8664 enum machine_mode cmpmode;
8667 cmpmode = SELECT_CC_MODE (code, op0, op1);
8668 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8670 /* This is very simple, but making the interface the same as in the
8671 FP case makes the rest of the code easier. */
8672 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8673 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8675 /* Return the test that should be put into the flags user, i.e.
8676 the bcc, scc, or cmov instruction. */
8677 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8680 /* Figure out whether to use ordered or unordered fp comparisons.
8681 Return the appropriate mode to use. */
8684 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8686 /* ??? In order to make all comparisons reversible, we do all comparisons
8687 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8688 all forms trapping and nontrapping comparisons, we can make inequality
8689 comparisons trapping again, since it results in better code when using
8690 FCOM based compares. */
8691 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8695 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8697 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8698 return ix86_fp_compare_mode (code);
8701 /* Only zero flag is needed. */
8703 case NE: /* ZF!=0 */
8705 /* Codes needing carry flag. */
8706 case GEU: /* CF=0 */
8707 case GTU: /* CF=0 & ZF=0 */
8708 case LTU: /* CF=1 */
8709 case LEU: /* CF=1 | ZF=1 */
8711 /* Codes possibly doable only with sign flag when
8712 comparing against zero. */
8713 case GE: /* SF=OF or SF=0 */
8714 case LT: /* SF<>OF or SF=1 */
8715 if (op1 == const0_rtx)
8718 /* For other cases Carry flag is not required. */
8720 /* Codes doable only with sign flag when comparing
8721 against zero, but we miss jump instruction for it
8722 so we need to use relational tests against overflow
8723 that thus needs to be zero. */
8724 case GT: /* ZF=0 & SF=OF */
8725 case LE: /* ZF=1 | SF<>OF */
8726 if (op1 == const0_rtx)
8730 /* strcmp pattern do (use flags) and combine may ask us for proper
8739 /* Return the fixed registers used for condition codes. */
8742 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8749 /* If two condition code modes are compatible, return a condition code
8750 mode which is compatible with both. Otherwise, return
8753 static enum machine_mode
8754 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8759 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8762 if ((m1 == CCGCmode && m2 == CCGOCmode)
8763 || (m1 == CCGOCmode && m2 == CCGCmode))
8791 /* These are only compatible with themselves, which we already
8797 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8800 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8802 enum rtx_code swapped_code = swap_condition (code);
8803 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8804 || (ix86_fp_comparison_cost (swapped_code)
8805 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8808 /* Swap, force into registers, or otherwise massage the two operands
8809 to a fp comparison. The operands are updated in place; the new
8810 comparison code is returned. */
8812 static enum rtx_code
8813 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8815 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8816 rtx op0 = *pop0, op1 = *pop1;
8817 enum machine_mode op_mode = GET_MODE (op0);
8818 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8820 /* All of the unordered compare instructions only work on registers.
8821 The same is true of the XFmode compare instructions. The same is
8822 true of the fcomi compare instructions. */
8825 && (fpcmp_mode == CCFPUmode
8826 || op_mode == XFmode
8827 || ix86_use_fcomi_compare (code)))
8829 op0 = force_reg (op_mode, op0);
8830 op1 = force_reg (op_mode, op1);
8834 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8835 things around if they appear profitable, otherwise force op0
8838 if (standard_80387_constant_p (op0) == 0
8839 || (GET_CODE (op0) == MEM
8840 && ! (standard_80387_constant_p (op1) == 0
8841 || GET_CODE (op1) == MEM)))
8844 tmp = op0, op0 = op1, op1 = tmp;
8845 code = swap_condition (code);
8848 if (GET_CODE (op0) != REG)
8849 op0 = force_reg (op_mode, op0);
8851 if (CONSTANT_P (op1))
8853 if (standard_80387_constant_p (op1))
8854 op1 = force_reg (op_mode, op1);
8856 op1 = validize_mem (force_const_mem (op_mode, op1));
8860 /* Try to rearrange the comparison to make it cheaper. */
8861 if (ix86_fp_comparison_cost (code)
8862 > ix86_fp_comparison_cost (swap_condition (code))
8863 && (GET_CODE (op1) == REG || !no_new_pseudos))
8866 tmp = op0, op0 = op1, op1 = tmp;
8867 code = swap_condition (code);
8868 if (GET_CODE (op0) != REG)
8869 op0 = force_reg (op_mode, op0);
8877 /* Convert comparison codes we use to represent FP comparison to integer
8878 code that will result in proper branch. Return UNKNOWN if no such code
8880 static enum rtx_code
8881 ix86_fp_compare_code_to_integer (enum rtx_code code)
8910 /* Split comparison code CODE into comparisons we can do using branch
8911 instructions. BYPASS_CODE is comparison code for branch that will
8912 branch around FIRST_CODE and SECOND_CODE. If some of branches
8913 is not required, set value to NIL.
8914 We never require more than two branches. */
8916 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8917 enum rtx_code *first_code,
8918 enum rtx_code *second_code)
8924 /* The fcomi comparison sets flags as follows:
8934 case GT: /* GTU - CF=0 & ZF=0 */
8935 case GE: /* GEU - CF=0 */
8936 case ORDERED: /* PF=0 */
8937 case UNORDERED: /* PF=1 */
8938 case UNEQ: /* EQ - ZF=1 */
8939 case UNLT: /* LTU - CF=1 */
8940 case UNLE: /* LEU - CF=1 | ZF=1 */
8941 case LTGT: /* EQ - ZF=0 */
8943 case LT: /* LTU - CF=1 - fails on unordered */
8945 *bypass_code = UNORDERED;
8947 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8949 *bypass_code = UNORDERED;
8951 case EQ: /* EQ - ZF=1 - fails on unordered */
8953 *bypass_code = UNORDERED;
8955 case NE: /* NE - ZF=0 - fails on unordered */
8957 *second_code = UNORDERED;
8959 case UNGE: /* GEU - CF=0 - fails on unordered */
8961 *second_code = UNORDERED;
8963 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8965 *second_code = UNORDERED;
8970 if (!TARGET_IEEE_FP)
8977 /* Return cost of comparison done fcom + arithmetics operations on AX.
8978 All following functions do use number of instructions as a cost metrics.
8979 In future this should be tweaked to compute bytes for optimize_size and
8980 take into account performance of various instructions on various CPUs. */
8982 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8984 if (!TARGET_IEEE_FP)
8986 /* The cost of code output by ix86_expand_fp_compare. */
9014 /* Return cost of comparison done using fcomi operation.
9015 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9017 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9019 enum rtx_code bypass_code, first_code, second_code;
9020 /* Return arbitrarily high cost when instruction is not supported - this
9021 prevents gcc from using it. */
9024 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9025 return (bypass_code != NIL || second_code != NIL) + 2;
9028 /* Return cost of comparison done using sahf operation.
9029 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9031 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9033 enum rtx_code bypass_code, first_code, second_code;
9034 /* Return arbitrarily high cost when instruction is not preferred - this
9035 avoids gcc from using it. */
9036 if (!TARGET_USE_SAHF && !optimize_size)
9038 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9039 return (bypass_code != NIL || second_code != NIL) + 3;
9042 /* Compute cost of the comparison done using any method.
9043 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9045 ix86_fp_comparison_cost (enum rtx_code code)
9047 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9050 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9051 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9053 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9054 if (min > sahf_cost)
9056 if (min > fcomi_cost)
9061 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9064 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9065 rtx *second_test, rtx *bypass_test)
9067 enum machine_mode fpcmp_mode, intcmp_mode;
9069 int cost = ix86_fp_comparison_cost (code);
9070 enum rtx_code bypass_code, first_code, second_code;
9072 fpcmp_mode = ix86_fp_compare_mode (code);
9073 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9076 *second_test = NULL_RTX;
9078 *bypass_test = NULL_RTX;
9080 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9082 /* Do fcomi/sahf based test when profitable. */
9083 if ((bypass_code == NIL || bypass_test)
9084 && (second_code == NIL || second_test)
9085 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9089 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9090 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9096 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9097 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9099 scratch = gen_reg_rtx (HImode);
9100 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9101 emit_insn (gen_x86_sahf_1 (scratch));
9104 /* The FP codes work out to act like unsigned. */
9105 intcmp_mode = fpcmp_mode;
9107 if (bypass_code != NIL)
9108 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9109 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9111 if (second_code != NIL)
9112 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9113 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9118 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9119 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9120 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9122 scratch = gen_reg_rtx (HImode);
9123 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9125 /* In the unordered case, we have to check C2 for NaN's, which
9126 doesn't happen to work out to anything nice combination-wise.
9127 So do some bit twiddling on the value we've got in AH to come
9128 up with an appropriate set of condition codes. */
9130 intcmp_mode = CCNOmode;
9135 if (code == GT || !TARGET_IEEE_FP)
9137 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9142 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9143 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9144 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9145 intcmp_mode = CCmode;
9151 if (code == LT && TARGET_IEEE_FP)
9153 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9154 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9155 intcmp_mode = CCmode;
9160 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9166 if (code == GE || !TARGET_IEEE_FP)
9168 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9173 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9174 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9181 if (code == LE && TARGET_IEEE_FP)
9183 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9184 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9185 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9186 intcmp_mode = CCmode;
9191 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9197 if (code == EQ && TARGET_IEEE_FP)
9199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9200 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9201 intcmp_mode = CCmode;
9206 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9213 if (code == NE && TARGET_IEEE_FP)
9215 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9216 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9222 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9241 /* Return the test that should be put into the flags user, i.e.
9242 the bcc, scc, or cmov instruction. */
9243 return gen_rtx_fmt_ee (code, VOIDmode,
9244 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9249 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9252 op0 = ix86_compare_op0;
9253 op1 = ix86_compare_op1;
9256 *second_test = NULL_RTX;
9258 *bypass_test = NULL_RTX;
9260 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9261 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9262 second_test, bypass_test);
9264 ret = ix86_expand_int_compare (code, op0, op1);
9269 /* Return true if the CODE will result in nontrivial jump sequence. */
9271 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9273 enum rtx_code bypass_code, first_code, second_code;
9276 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9277 return bypass_code != NIL || second_code != NIL;
9281 ix86_expand_branch (enum rtx_code code, rtx label)
9285 switch (GET_MODE (ix86_compare_op0))
9291 tmp = ix86_expand_compare (code, NULL, NULL);
9292 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9293 gen_rtx_LABEL_REF (VOIDmode, label),
9295 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9304 enum rtx_code bypass_code, first_code, second_code;
9306 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9309 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9311 /* Check whether we will use the natural sequence with one jump. If
9312 so, we can expand jump early. Otherwise delay expansion by
9313 creating compound insn to not confuse optimizers. */
9314 if (bypass_code == NIL && second_code == NIL
9317 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9318 gen_rtx_LABEL_REF (VOIDmode, label),
9323 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9324 ix86_compare_op0, ix86_compare_op1);
9325 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9326 gen_rtx_LABEL_REF (VOIDmode, label),
9328 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9330 use_fcomi = ix86_use_fcomi_compare (code);
9331 vec = rtvec_alloc (3 + !use_fcomi);
9332 RTVEC_ELT (vec, 0) = tmp;
9334 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9336 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9339 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9341 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9349 /* Expand DImode branch into multiple compare+branch. */
9351 rtx lo[2], hi[2], label2;
9352 enum rtx_code code1, code2, code3;
9354 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9356 tmp = ix86_compare_op0;
9357 ix86_compare_op0 = ix86_compare_op1;
9358 ix86_compare_op1 = tmp;
9359 code = swap_condition (code);
9361 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9362 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9364 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9365 avoid two branches. This costs one extra insn, so disable when
9366 optimizing for size. */
9368 if ((code == EQ || code == NE)
9370 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9375 if (hi[1] != const0_rtx)
9376 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9377 NULL_RTX, 0, OPTAB_WIDEN);
9380 if (lo[1] != const0_rtx)
9381 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9382 NULL_RTX, 0, OPTAB_WIDEN);
9384 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9385 NULL_RTX, 0, OPTAB_WIDEN);
9387 ix86_compare_op0 = tmp;
9388 ix86_compare_op1 = const0_rtx;
9389 ix86_expand_branch (code, label);
9393 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9394 op1 is a constant and the low word is zero, then we can just
9395 examine the high word. */
9397 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9400 case LT: case LTU: case GE: case GEU:
9401 ix86_compare_op0 = hi[0];
9402 ix86_compare_op1 = hi[1];
9403 ix86_expand_branch (code, label);
9409 /* Otherwise, we need two or three jumps. */
9411 label2 = gen_label_rtx ();
9414 code2 = swap_condition (code);
9415 code3 = unsigned_condition (code);
9419 case LT: case GT: case LTU: case GTU:
9422 case LE: code1 = LT; code2 = GT; break;
9423 case GE: code1 = GT; code2 = LT; break;
9424 case LEU: code1 = LTU; code2 = GTU; break;
9425 case GEU: code1 = GTU; code2 = LTU; break;
9427 case EQ: code1 = NIL; code2 = NE; break;
9428 case NE: code2 = NIL; break;
9436 * if (hi(a) < hi(b)) goto true;
9437 * if (hi(a) > hi(b)) goto false;
9438 * if (lo(a) < lo(b)) goto true;
9442 ix86_compare_op0 = hi[0];
9443 ix86_compare_op1 = hi[1];
9446 ix86_expand_branch (code1, label);
9448 ix86_expand_branch (code2, label2);
9450 ix86_compare_op0 = lo[0];
9451 ix86_compare_op1 = lo[1];
9452 ix86_expand_branch (code3, label);
9455 emit_label (label2);
9464 /* Split branch based on floating point condition. */
9466 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9467 rtx target1, rtx target2, rtx tmp)
9470 rtx label = NULL_RTX;
9472 int bypass_probability = -1, second_probability = -1, probability = -1;
9475 if (target2 != pc_rtx)
9478 code = reverse_condition_maybe_unordered (code);
9483 condition = ix86_expand_fp_compare (code, op1, op2,
9484 tmp, &second, &bypass);
9486 if (split_branch_probability >= 0)
9488 /* Distribute the probabilities across the jumps.
9489 Assume the BYPASS and SECOND to be always test
9491 probability = split_branch_probability;
9493 /* Value of 1 is low enough to make no need for probability
9494 to be updated. Later we may run some experiments and see
9495 if unordered values are more frequent in practice. */
9497 bypass_probability = 1;
9499 second_probability = 1;
9501 if (bypass != NULL_RTX)
9503 label = gen_label_rtx ();
9504 i = emit_jump_insn (gen_rtx_SET
9506 gen_rtx_IF_THEN_ELSE (VOIDmode,
9508 gen_rtx_LABEL_REF (VOIDmode,
9511 if (bypass_probability >= 0)
9513 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9514 GEN_INT (bypass_probability),
9517 i = emit_jump_insn (gen_rtx_SET
9519 gen_rtx_IF_THEN_ELSE (VOIDmode,
9520 condition, target1, target2)));
9521 if (probability >= 0)
9523 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9524 GEN_INT (probability),
9526 if (second != NULL_RTX)
9528 i = emit_jump_insn (gen_rtx_SET
9530 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9532 if (second_probability >= 0)
9534 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9535 GEN_INT (second_probability),
9538 if (label != NULL_RTX)
9543 ix86_expand_setcc (enum rtx_code code, rtx dest)
9545 rtx ret, tmp, tmpreg, equiv;
9546 rtx second_test, bypass_test;
9548 if (GET_MODE (ix86_compare_op0) == DImode
9550 return 0; /* FAIL */
9552 if (GET_MODE (dest) != QImode)
9555 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9556 PUT_MODE (ret, QImode);
9561 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9562 if (bypass_test || second_test)
9564 rtx test = second_test;
9566 rtx tmp2 = gen_reg_rtx (QImode);
9573 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9575 PUT_MODE (test, QImode);
9576 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9579 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9581 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9584 /* Attach a REG_EQUAL note describing the comparison result. */
9585 equiv = simplify_gen_relational (code, QImode,
9586 GET_MODE (ix86_compare_op0),
9587 ix86_compare_op0, ix86_compare_op1);
9588 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9590 return 1; /* DONE */
9593 /* Expand comparison setting or clearing carry flag. Return true when
9594 successful and set pop for the operation. */
9596 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9598 enum machine_mode mode =
9599 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9601 /* Do not handle DImode compares that go trought special path. Also we can't
9602 deal with FP compares yet. This is possible to add. */
9603 if ((mode == DImode && !TARGET_64BIT))
9605 if (FLOAT_MODE_P (mode))
9607 rtx second_test = NULL, bypass_test = NULL;
9608 rtx compare_op, compare_seq;
9610 /* Shortcut: following common codes never translate into carry flag compares. */
9611 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9612 || code == ORDERED || code == UNORDERED)
9615 /* These comparisons require zero flag; swap operands so they won't. */
9616 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9622 code = swap_condition (code);
9625 /* Try to expand the comparison and verify that we end up with carry flag
9626 based comparison. This is fails to be true only when we decide to expand
9627 comparison using arithmetic that is not too common scenario. */
9629 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9630 &second_test, &bypass_test);
9631 compare_seq = get_insns ();
9634 if (second_test || bypass_test)
9636 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9637 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9638 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9640 code = GET_CODE (compare_op);
9641 if (code != LTU && code != GEU)
9643 emit_insn (compare_seq);
9647 if (!INTEGRAL_MODE_P (mode))
9655 /* Convert a==0 into (unsigned)a<1. */
9658 if (op1 != const0_rtx)
9661 code = (code == EQ ? LTU : GEU);
9664 /* Convert a>b into b<a or a>=b-1. */
9667 if (GET_CODE (op1) == CONST_INT)
9669 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9670 /* Bail out on overflow. We still can swap operands but that
9671 would force loading of the constant into register. */
9672 if (op1 == const0_rtx
9673 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9675 code = (code == GTU ? GEU : LTU);
9682 code = (code == GTU ? LTU : GEU);
9686 /* Convert a>=0 into (unsigned)a<0x80000000. */
9689 if (mode == DImode || op1 != const0_rtx)
9691 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9692 code = (code == LT ? GEU : LTU);
9696 if (mode == DImode || op1 != constm1_rtx)
9698 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9699 code = (code == LE ? GEU : LTU);
9705 /* Swapping operands may cause constant to appear as first operand. */
9706 if (!nonimmediate_operand (op0, VOIDmode))
9710 op0 = force_reg (mode, op0);
9712 ix86_compare_op0 = op0;
9713 ix86_compare_op1 = op1;
9714 *pop = ix86_expand_compare (code, NULL, NULL);
9715 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9721 ix86_expand_int_movcc (rtx operands[])
9723 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9724 rtx compare_seq, compare_op;
9725 rtx second_test, bypass_test;
9726 enum machine_mode mode = GET_MODE (operands[0]);
9727 bool sign_bit_compare_p = false;;
9730 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9731 compare_seq = get_insns ();
9734 compare_code = GET_CODE (compare_op);
9736 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9737 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9738 sign_bit_compare_p = true;
9740 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9741 HImode insns, we'd be swallowed in word prefix ops. */
9743 if ((mode != HImode || TARGET_FAST_PREFIX)
9744 && (mode != DImode || TARGET_64BIT)
9745 && GET_CODE (operands[2]) == CONST_INT
9746 && GET_CODE (operands[3]) == CONST_INT)
9748 rtx out = operands[0];
9749 HOST_WIDE_INT ct = INTVAL (operands[2]);
9750 HOST_WIDE_INT cf = INTVAL (operands[3]);
9754 /* Sign bit compares are better done using shifts than we do by using
9756 if (sign_bit_compare_p
9757 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9758 ix86_compare_op1, &compare_op))
9760 /* Detect overlap between destination and compare sources. */
9763 if (!sign_bit_compare_p)
9767 compare_code = GET_CODE (compare_op);
9769 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9770 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9773 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9776 /* To simplify rest of code, restrict to the GEU case. */
9777 if (compare_code == LTU)
9779 HOST_WIDE_INT tmp = ct;
9782 compare_code = reverse_condition (compare_code);
9783 code = reverse_condition (code);
9788 PUT_CODE (compare_op,
9789 reverse_condition_maybe_unordered
9790 (GET_CODE (compare_op)));
9792 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9796 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9797 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9798 tmp = gen_reg_rtx (mode);
9801 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9803 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9807 if (code == GT || code == GE)
9808 code = reverse_condition (code);
9811 HOST_WIDE_INT tmp = ct;
9816 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9817 ix86_compare_op1, VOIDmode, 0, -1);
9830 tmp = expand_simple_binop (mode, PLUS,
9832 copy_rtx (tmp), 1, OPTAB_DIRECT);
9843 tmp = expand_simple_binop (mode, IOR,
9845 copy_rtx (tmp), 1, OPTAB_DIRECT);
9847 else if (diff == -1 && ct)
9857 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9859 tmp = expand_simple_binop (mode, PLUS,
9860 copy_rtx (tmp), GEN_INT (cf),
9861 copy_rtx (tmp), 1, OPTAB_DIRECT);
9869 * andl cf - ct, dest
9879 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9882 tmp = expand_simple_binop (mode, AND,
9884 gen_int_mode (cf - ct, mode),
9885 copy_rtx (tmp), 1, OPTAB_DIRECT);
9887 tmp = expand_simple_binop (mode, PLUS,
9888 copy_rtx (tmp), GEN_INT (ct),
9889 copy_rtx (tmp), 1, OPTAB_DIRECT);
9892 if (!rtx_equal_p (tmp, out))
9893 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9895 return 1; /* DONE */
9901 tmp = ct, ct = cf, cf = tmp;
9903 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9905 /* We may be reversing unordered compare to normal compare, that
9906 is not valid in general (we may convert non-trapping condition
9907 to trapping one), however on i386 we currently emit all
9908 comparisons unordered. */
9909 compare_code = reverse_condition_maybe_unordered (compare_code);
9910 code = reverse_condition_maybe_unordered (code);
9914 compare_code = reverse_condition (compare_code);
9915 code = reverse_condition (code);
9920 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9921 && GET_CODE (ix86_compare_op1) == CONST_INT)
9923 if (ix86_compare_op1 == const0_rtx
9924 && (code == LT || code == GE))
9925 compare_code = code;
9926 else if (ix86_compare_op1 == constm1_rtx)
9930 else if (code == GT)
9935 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9936 if (compare_code != NIL
9937 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9938 && (cf == -1 || ct == -1))
9940 /* If lea code below could be used, only optimize
9941 if it results in a 2 insn sequence. */
9943 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9944 || diff == 3 || diff == 5 || diff == 9)
9945 || (compare_code == LT && ct == -1)
9946 || (compare_code == GE && cf == -1))
9949 * notl op1 (if necessary)
9957 code = reverse_condition (code);
9960 out = emit_store_flag (out, code, ix86_compare_op0,
9961 ix86_compare_op1, VOIDmode, 0, -1);
9963 out = expand_simple_binop (mode, IOR,
9965 out, 1, OPTAB_DIRECT);
9966 if (out != operands[0])
9967 emit_move_insn (operands[0], out);
9969 return 1; /* DONE */
9974 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9975 || diff == 3 || diff == 5 || diff == 9)
9976 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9977 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9983 * lea cf(dest*(ct-cf)),dest
9987 * This also catches the degenerate setcc-only case.
9993 out = emit_store_flag (out, code, ix86_compare_op0,
9994 ix86_compare_op1, VOIDmode, 0, 1);
9997 /* On x86_64 the lea instruction operates on Pmode, so we need
9998 to get arithmetics done in proper mode to match. */
10000 tmp = copy_rtx (out);
10004 out1 = copy_rtx (out);
10005 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10009 tmp = gen_rtx_PLUS (mode, tmp, out1);
10015 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10018 if (!rtx_equal_p (tmp, out))
10021 out = force_operand (tmp, copy_rtx (out));
10023 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10025 if (!rtx_equal_p (out, operands[0]))
10026 emit_move_insn (operands[0], copy_rtx (out));
10028 return 1; /* DONE */
10032 * General case: Jumpful:
10033 * xorl dest,dest cmpl op1, op2
10034 * cmpl op1, op2 movl ct, dest
10035 * setcc dest jcc 1f
10036 * decl dest movl cf, dest
10037 * andl (cf-ct),dest 1:
10040 * Size 20. Size 14.
10042 * This is reasonably steep, but branch mispredict costs are
10043 * high on modern cpus, so consider failing only if optimizing
10047 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10048 && BRANCH_COST >= 2)
10054 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10055 /* We may be reversing unordered compare to normal compare,
10056 that is not valid in general (we may convert non-trapping
10057 condition to trapping one), however on i386 we currently
10058 emit all comparisons unordered. */
10059 code = reverse_condition_maybe_unordered (code);
10062 code = reverse_condition (code);
10063 if (compare_code != NIL)
10064 compare_code = reverse_condition (compare_code);
10068 if (compare_code != NIL)
10070 /* notl op1 (if needed)
10075 For x < 0 (resp. x <= -1) there will be no notl,
10076 so if possible swap the constants to get rid of the
10078 True/false will be -1/0 while code below (store flag
10079 followed by decrement) is 0/-1, so the constants need
10080 to be exchanged once more. */
10082 if (compare_code == GE || !cf)
10084 code = reverse_condition (code);
10089 HOST_WIDE_INT tmp = cf;
10094 out = emit_store_flag (out, code, ix86_compare_op0,
10095 ix86_compare_op1, VOIDmode, 0, -1);
10099 out = emit_store_flag (out, code, ix86_compare_op0,
10100 ix86_compare_op1, VOIDmode, 0, 1);
10102 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10103 copy_rtx (out), 1, OPTAB_DIRECT);
10106 out = expand_simple_binop (mode, AND, copy_rtx (out),
10107 gen_int_mode (cf - ct, mode),
10108 copy_rtx (out), 1, OPTAB_DIRECT);
10110 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10111 copy_rtx (out), 1, OPTAB_DIRECT);
10112 if (!rtx_equal_p (out, operands[0]))
10113 emit_move_insn (operands[0], copy_rtx (out));
10115 return 1; /* DONE */
10119 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10121 /* Try a few things more with specific constants and a variable. */
10124 rtx var, orig_out, out, tmp;
10126 if (BRANCH_COST <= 2)
10127 return 0; /* FAIL */
10129 /* If one of the two operands is an interesting constant, load a
10130 constant with the above and mask it in with a logical operation. */
10132 if (GET_CODE (operands[2]) == CONST_INT)
10135 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10136 operands[3] = constm1_rtx, op = and_optab;
10137 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10138 operands[3] = const0_rtx, op = ior_optab;
10140 return 0; /* FAIL */
10142 else if (GET_CODE (operands[3]) == CONST_INT)
10145 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10146 operands[2] = constm1_rtx, op = and_optab;
10147 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10148 operands[2] = const0_rtx, op = ior_optab;
10150 return 0; /* FAIL */
10153 return 0; /* FAIL */
10155 orig_out = operands[0];
10156 tmp = gen_reg_rtx (mode);
10159 /* Recurse to get the constant loaded. */
10160 if (ix86_expand_int_movcc (operands) == 0)
10161 return 0; /* FAIL */
10163 /* Mask in the interesting variable. */
10164 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10166 if (!rtx_equal_p (out, orig_out))
10167 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10169 return 1; /* DONE */
10173 * For comparison with above,
10183 if (! nonimmediate_operand (operands[2], mode))
10184 operands[2] = force_reg (mode, operands[2]);
10185 if (! nonimmediate_operand (operands[3], mode))
10186 operands[3] = force_reg (mode, operands[3]);
10188 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10190 rtx tmp = gen_reg_rtx (mode);
10191 emit_move_insn (tmp, operands[3]);
10194 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10196 rtx tmp = gen_reg_rtx (mode);
10197 emit_move_insn (tmp, operands[2]);
10201 if (! register_operand (operands[2], VOIDmode)
10203 || ! register_operand (operands[3], VOIDmode)))
10204 operands[2] = force_reg (mode, operands[2]);
10207 && ! register_operand (operands[3], VOIDmode))
10208 operands[3] = force_reg (mode, operands[3]);
10210 emit_insn (compare_seq);
10211 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10212 gen_rtx_IF_THEN_ELSE (mode,
10213 compare_op, operands[2],
10216 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10217 gen_rtx_IF_THEN_ELSE (mode,
10219 copy_rtx (operands[3]),
10220 copy_rtx (operands[0]))));
10222 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10223 gen_rtx_IF_THEN_ELSE (mode,
10225 copy_rtx (operands[2]),
10226 copy_rtx (operands[0]))));
10228 return 1; /* DONE */
10232 ix86_expand_fp_movcc (rtx operands[])
10234 enum rtx_code code;
10236 rtx compare_op, second_test, bypass_test;
10238 /* For SF/DFmode conditional moves based on comparisons
10239 in same mode, we may want to use SSE min/max instructions. */
10240 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10241 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10242 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10243 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10244 && (!TARGET_IEEE_FP
10245 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10246 /* We may be called from the post-reload splitter. */
10247 && (!REG_P (operands[0])
10248 || SSE_REG_P (operands[0])
10249 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10251 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10252 code = GET_CODE (operands[1]);
10254 /* See if we have (cross) match between comparison operands and
10255 conditional move operands. */
10256 if (rtx_equal_p (operands[2], op1))
10261 code = reverse_condition_maybe_unordered (code);
10263 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10265 /* Check for min operation. */
10266 if (code == LT || code == UNLE)
10274 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10275 if (memory_operand (op0, VOIDmode))
10276 op0 = force_reg (GET_MODE (operands[0]), op0);
10277 if (GET_MODE (operands[0]) == SFmode)
10278 emit_insn (gen_minsf3 (operands[0], op0, op1));
10280 emit_insn (gen_mindf3 (operands[0], op0, op1));
10283 /* Check for max operation. */
10284 if (code == GT || code == UNGE)
10292 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10293 if (memory_operand (op0, VOIDmode))
10294 op0 = force_reg (GET_MODE (operands[0]), op0);
10295 if (GET_MODE (operands[0]) == SFmode)
10296 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10298 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10302 /* Manage condition to be sse_comparison_operator. In case we are
10303 in non-ieee mode, try to canonicalize the destination operand
10304 to be first in the comparison - this helps reload to avoid extra
10306 if (!sse_comparison_operator (operands[1], VOIDmode)
10307 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10309 rtx tmp = ix86_compare_op0;
10310 ix86_compare_op0 = ix86_compare_op1;
10311 ix86_compare_op1 = tmp;
10312 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10313 VOIDmode, ix86_compare_op0,
10316 /* Similarly try to manage result to be first operand of conditional
10317 move. We also don't support the NE comparison on SSE, so try to
10319 if ((rtx_equal_p (operands[0], operands[3])
10320 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10321 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10323 rtx tmp = operands[2];
10324 operands[2] = operands[3];
10326 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10327 (GET_CODE (operands[1])),
10328 VOIDmode, ix86_compare_op0,
10331 if (GET_MODE (operands[0]) == SFmode)
10332 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10333 operands[2], operands[3],
10334 ix86_compare_op0, ix86_compare_op1));
10336 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10337 operands[2], operands[3],
10338 ix86_compare_op0, ix86_compare_op1));
10342 /* The floating point conditional move instructions don't directly
10343 support conditions resulting from a signed integer comparison. */
10345 code = GET_CODE (operands[1]);
10346 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10348 /* The floating point conditional move instructions don't directly
10349 support signed integer comparisons. */
10351 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10353 if (second_test != NULL || bypass_test != NULL)
10355 tmp = gen_reg_rtx (QImode);
10356 ix86_expand_setcc (code, tmp);
10358 ix86_compare_op0 = tmp;
10359 ix86_compare_op1 = const0_rtx;
10360 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10362 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10364 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10365 emit_move_insn (tmp, operands[3]);
10368 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10370 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10371 emit_move_insn (tmp, operands[2]);
10375 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10376 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10381 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10382 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10387 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10388 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10396 /* Expand conditional increment or decrement using adb/sbb instructions.
10397 The default case using setcc followed by the conditional move can be
10398 done by generic code. */
10400 ix86_expand_int_addcc (rtx operands[])
10402 enum rtx_code code = GET_CODE (operands[1]);
10404 rtx val = const0_rtx;
10405 bool fpcmp = false;
10406 enum machine_mode mode = GET_MODE (operands[0]);
10408 if (operands[3] != const1_rtx
10409 && operands[3] != constm1_rtx)
10411 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10412 ix86_compare_op1, &compare_op))
10414 code = GET_CODE (compare_op);
10416 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10417 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10420 code = ix86_fp_compare_code_to_integer (code);
10427 PUT_CODE (compare_op,
10428 reverse_condition_maybe_unordered
10429 (GET_CODE (compare_op)));
10431 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10433 PUT_MODE (compare_op, mode);
10435 /* Construct either adc or sbb insn. */
10436 if ((code == LTU) == (operands[3] == constm1_rtx))
10438 switch (GET_MODE (operands[0]))
10441 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10444 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10447 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10450 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10458 switch (GET_MODE (operands[0]))
10461 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10464 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10467 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10470 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10476 return 1; /* DONE */
10480 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10481 works for floating pointer parameters and nonoffsetable memories.
10482 For pushes, it returns just stack offsets; the values will be saved
10483 in the right order. Maximally three parts are generated. */
10486 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10491 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10493 size = (GET_MODE_SIZE (mode) + 4) / 8;
10495 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10497 if (size < 2 || size > 3)
10500 /* Optimize constant pool reference to immediates. This is used by fp
10501 moves, that force all constants to memory to allow combining. */
10502 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10504 rtx tmp = maybe_get_pool_constant (operand);
10509 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10511 /* The only non-offsetable memories we handle are pushes. */
10512 if (! push_operand (operand, VOIDmode))
10515 operand = copy_rtx (operand);
10516 PUT_MODE (operand, Pmode);
10517 parts[0] = parts[1] = parts[2] = operand;
10519 else if (!TARGET_64BIT)
10521 if (mode == DImode)
10522 split_di (&operand, 1, &parts[0], &parts[1]);
10525 if (REG_P (operand))
10527 if (!reload_completed)
10529 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10530 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10532 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10534 else if (offsettable_memref_p (operand))
10536 operand = adjust_address (operand, SImode, 0);
10537 parts[0] = operand;
10538 parts[1] = adjust_address (operand, SImode, 4);
10540 parts[2] = adjust_address (operand, SImode, 8);
10542 else if (GET_CODE (operand) == CONST_DOUBLE)
10547 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10551 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10552 parts[2] = gen_int_mode (l[2], SImode);
10555 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10560 parts[1] = gen_int_mode (l[1], SImode);
10561 parts[0] = gen_int_mode (l[0], SImode);
10569 if (mode == TImode)
10570 split_ti (&operand, 1, &parts[0], &parts[1]);
10571 if (mode == XFmode || mode == TFmode)
10573 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10574 if (REG_P (operand))
10576 if (!reload_completed)
10578 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10579 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10581 else if (offsettable_memref_p (operand))
10583 operand = adjust_address (operand, DImode, 0);
10584 parts[0] = operand;
10585 parts[1] = adjust_address (operand, upper_mode, 8);
10587 else if (GET_CODE (operand) == CONST_DOUBLE)
10592 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10593 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10594 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10595 if (HOST_BITS_PER_WIDE_INT >= 64)
10598 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10599 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10602 parts[0] = immed_double_const (l[0], l[1], DImode);
10603 if (upper_mode == SImode)
10604 parts[1] = gen_int_mode (l[2], SImode);
10605 else if (HOST_BITS_PER_WIDE_INT >= 64)
10608 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10609 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10612 parts[1] = immed_double_const (l[2], l[3], DImode);
10622 /* Emit insns to perform a move or push of DI, DF, and XF values.
10623 Return false when normal moves are needed; true when all required
10624 insns have been emitted. Operands 2-4 contain the input values
10625 int the correct order; operands 5-7 contain the output values. */
10628 ix86_split_long_move (rtx operands[])
10633 int collisions = 0;
10634 enum machine_mode mode = GET_MODE (operands[0]);
10636 /* The DFmode expanders may ask us to move double.
10637 For 64bit target this is single move. By hiding the fact
10638 here we simplify i386.md splitters. */
10639 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10641 /* Optimize constant pool reference to immediates. This is used by
10642 fp moves, that force all constants to memory to allow combining. */
10644 if (GET_CODE (operands[1]) == MEM
10645 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10646 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10647 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10648 if (push_operand (operands[0], VOIDmode))
10650 operands[0] = copy_rtx (operands[0]);
10651 PUT_MODE (operands[0], Pmode);
10654 operands[0] = gen_lowpart (DImode, operands[0]);
10655 operands[1] = gen_lowpart (DImode, operands[1]);
10656 emit_move_insn (operands[0], operands[1]);
10660 /* The only non-offsettable memory we handle is push. */
10661 if (push_operand (operands[0], VOIDmode))
10663 else if (GET_CODE (operands[0]) == MEM
10664 && ! offsettable_memref_p (operands[0]))
10667 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10668 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10670 /* When emitting push, take care for source operands on the stack. */
10671 if (push && GET_CODE (operands[1]) == MEM
10672 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10675 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10676 XEXP (part[1][2], 0));
10677 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10678 XEXP (part[1][1], 0));
10681 /* We need to do copy in the right order in case an address register
10682 of the source overlaps the destination. */
10683 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10685 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10687 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10690 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10693 /* Collision in the middle part can be handled by reordering. */
10694 if (collisions == 1 && nparts == 3
10695 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10698 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10699 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10702 /* If there are more collisions, we can't handle it by reordering.
10703 Do an lea to the last part and use only one colliding move. */
10704 else if (collisions > 1)
10710 base = part[0][nparts - 1];
10712 /* Handle the case when the last part isn't valid for lea.
10713 Happens in 64-bit mode storing the 12-byte XFmode. */
10714 if (GET_MODE (base) != Pmode)
10715 base = gen_rtx_REG (Pmode, REGNO (base));
10717 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10718 part[1][0] = replace_equiv_address (part[1][0], base);
10719 part[1][1] = replace_equiv_address (part[1][1],
10720 plus_constant (base, UNITS_PER_WORD));
10722 part[1][2] = replace_equiv_address (part[1][2],
10723 plus_constant (base, 8));
10733 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10734 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10735 emit_move_insn (part[0][2], part[1][2]);
10740 /* In 64bit mode we don't have 32bit push available. In case this is
10741 register, it is OK - we will just use larger counterpart. We also
10742 retype memory - these comes from attempt to avoid REX prefix on
10743 moving of second half of TFmode value. */
10744 if (GET_MODE (part[1][1]) == SImode)
10746 if (GET_CODE (part[1][1]) == MEM)
10747 part[1][1] = adjust_address (part[1][1], DImode, 0);
10748 else if (REG_P (part[1][1]))
10749 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10752 if (GET_MODE (part[1][0]) == SImode)
10753 part[1][0] = part[1][1];
10756 emit_move_insn (part[0][1], part[1][1]);
10757 emit_move_insn (part[0][0], part[1][0]);
10761 /* Choose correct order to not overwrite the source before it is copied. */
10762 if ((REG_P (part[0][0])
10763 && REG_P (part[1][1])
10764 && (REGNO (part[0][0]) == REGNO (part[1][1])
10766 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10768 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10772 operands[2] = part[0][2];
10773 operands[3] = part[0][1];
10774 operands[4] = part[0][0];
10775 operands[5] = part[1][2];
10776 operands[6] = part[1][1];
10777 operands[7] = part[1][0];
10781 operands[2] = part[0][1];
10782 operands[3] = part[0][0];
10783 operands[5] = part[1][1];
10784 operands[6] = part[1][0];
10791 operands[2] = part[0][0];
10792 operands[3] = part[0][1];
10793 operands[4] = part[0][2];
10794 operands[5] = part[1][0];
10795 operands[6] = part[1][1];
10796 operands[7] = part[1][2];
10800 operands[2] = part[0][0];
10801 operands[3] = part[0][1];
10802 operands[5] = part[1][0];
10803 operands[6] = part[1][1];
10806 emit_move_insn (operands[2], operands[5]);
10807 emit_move_insn (operands[3], operands[6]);
10809 emit_move_insn (operands[4], operands[7]);
10815 ix86_split_ashldi (rtx *operands, rtx scratch)
10817 rtx low[2], high[2];
10820 if (GET_CODE (operands[2]) == CONST_INT)
10822 split_di (operands, 2, low, high);
10823 count = INTVAL (operands[2]) & 63;
10827 emit_move_insn (high[0], low[1]);
10828 emit_move_insn (low[0], const0_rtx);
10831 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10835 if (!rtx_equal_p (operands[0], operands[1]))
10836 emit_move_insn (operands[0], operands[1]);
10837 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10838 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10843 if (!rtx_equal_p (operands[0], operands[1]))
10844 emit_move_insn (operands[0], operands[1]);
10846 split_di (operands, 1, low, high);
10848 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10849 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10851 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10853 if (! no_new_pseudos)
10854 scratch = force_reg (SImode, const0_rtx);
10856 emit_move_insn (scratch, const0_rtx);
10858 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10862 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10867 ix86_split_ashrdi (rtx *operands, rtx scratch)
10869 rtx low[2], high[2];
10872 if (GET_CODE (operands[2]) == CONST_INT)
10874 split_di (operands, 2, low, high);
10875 count = INTVAL (operands[2]) & 63;
10879 emit_move_insn (low[0], high[1]);
10881 if (! reload_completed)
10882 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10885 emit_move_insn (high[0], low[0]);
10886 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10890 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10894 if (!rtx_equal_p (operands[0], operands[1]))
10895 emit_move_insn (operands[0], operands[1]);
10896 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10897 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10902 if (!rtx_equal_p (operands[0], operands[1]))
10903 emit_move_insn (operands[0], operands[1]);
10905 split_di (operands, 1, low, high);
10907 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10908 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10910 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10912 if (! no_new_pseudos)
10913 scratch = gen_reg_rtx (SImode);
10914 emit_move_insn (scratch, high[0]);
10915 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10916 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10920 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10925 ix86_split_lshrdi (rtx *operands, rtx scratch)
10927 rtx low[2], high[2];
10930 if (GET_CODE (operands[2]) == CONST_INT)
10932 split_di (operands, 2, low, high);
10933 count = INTVAL (operands[2]) & 63;
10937 emit_move_insn (low[0], high[1]);
10938 emit_move_insn (high[0], const0_rtx);
10941 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10945 if (!rtx_equal_p (operands[0], operands[1]))
10946 emit_move_insn (operands[0], operands[1]);
10947 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10948 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10953 if (!rtx_equal_p (operands[0], operands[1]))
10954 emit_move_insn (operands[0], operands[1]);
10956 split_di (operands, 1, low, high);
10958 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10959 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10961 /* Heh. By reversing the arguments, we can reuse this pattern. */
10962 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10964 if (! no_new_pseudos)
10965 scratch = force_reg (SImode, const0_rtx);
10967 emit_move_insn (scratch, const0_rtx);
10969 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10973 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10977 /* Helper function for the string operations below. Dest VARIABLE whether
10978 it is aligned to VALUE bytes. If true, jump to the label. */
10980 ix86_expand_aligntest (rtx variable, int value)
10982 rtx label = gen_label_rtx ();
10983 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10984 if (GET_MODE (variable) == DImode)
10985 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10987 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10988 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10993 /* Adjust COUNTER by the VALUE. */
10995 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10997 if (GET_MODE (countreg) == DImode)
10998 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11000 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11003 /* Zero extend possibly SImode EXP to Pmode register. */
11005 ix86_zero_extend_to_Pmode (rtx exp)
11008 if (GET_MODE (exp) == VOIDmode)
11009 return force_reg (Pmode, exp);
11010 if (GET_MODE (exp) == Pmode)
11011 return copy_to_mode_reg (Pmode, exp);
11012 r = gen_reg_rtx (Pmode);
11013 emit_insn (gen_zero_extendsidi2 (r, exp));
11017 /* Expand string move (memcpy) operation. Use i386 string operations when
11018 profitable. expand_clrstr contains similar code. */
11020 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11022 rtx srcreg, destreg, countreg, srcexp, destexp;
11023 enum machine_mode counter_mode;
11024 HOST_WIDE_INT align = 0;
11025 unsigned HOST_WIDE_INT count = 0;
11027 if (GET_CODE (align_exp) == CONST_INT)
11028 align = INTVAL (align_exp);
11030 /* Can't use any of this if the user has appropriated esi or edi. */
11031 if (global_regs[4] || global_regs[5])
11034 /* This simple hack avoids all inlining code and simplifies code below. */
11035 if (!TARGET_ALIGN_STRINGOPS)
11038 if (GET_CODE (count_exp) == CONST_INT)
11040 count = INTVAL (count_exp);
11041 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11045 /* Figure out proper mode for counter. For 32bits it is always SImode,
11046 for 64bits use SImode when possible, otherwise DImode.
11047 Set count to number of bytes copied when known at compile time. */
11048 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11049 || x86_64_zero_extended_value (count_exp))
11050 counter_mode = SImode;
11052 counter_mode = DImode;
11054 if (counter_mode != SImode && counter_mode != DImode)
11057 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11058 if (destreg != XEXP (dst, 0))
11059 dst = replace_equiv_address_nv (dst, destreg);
11060 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11061 if (srcreg != XEXP (src, 0))
11062 src = replace_equiv_address_nv (src, srcreg);
11064 /* When optimizing for size emit simple rep ; movsb instruction for
11065 counts not divisible by 4. */
11067 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11069 emit_insn (gen_cld ());
11070 countreg = ix86_zero_extend_to_Pmode (count_exp);
11071 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11072 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11073 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11077 /* For constant aligned (or small unaligned) copies use rep movsl
11078 followed by code copying the rest. For PentiumPro ensure 8 byte
11079 alignment to allow rep movsl acceleration. */
11081 else if (count != 0
11083 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11084 || optimize_size || count < (unsigned int) 64))
11086 unsigned HOST_WIDE_INT offset = 0;
11087 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11088 rtx srcmem, dstmem;
11090 emit_insn (gen_cld ());
11091 if (count & ~(size - 1))
11093 countreg = copy_to_mode_reg (counter_mode,
11094 GEN_INT ((count >> (size == 4 ? 2 : 3))
11095 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11096 countreg = ix86_zero_extend_to_Pmode (countreg);
11098 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11099 GEN_INT (size == 4 ? 2 : 3));
11100 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11101 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11103 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11104 countreg, destexp, srcexp));
11105 offset = count & ~(size - 1);
11107 if (size == 8 && (count & 0x04))
11109 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11111 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11113 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11118 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11120 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11122 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11127 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11129 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11131 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11134 /* The generic code based on the glibc implementation:
11135 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11136 allowing accelerated copying there)
11137 - copy the data using rep movsl
11138 - copy the rest. */
11143 rtx srcmem, dstmem;
11144 int desired_alignment = (TARGET_PENTIUMPRO
11145 && (count == 0 || count >= (unsigned int) 260)
11146 ? 8 : UNITS_PER_WORD);
11147 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11148 dst = change_address (dst, BLKmode, destreg);
11149 src = change_address (src, BLKmode, srcreg);
11151 /* In case we don't know anything about the alignment, default to
11152 library version, since it is usually equally fast and result in
11155 Also emit call when we know that the count is large and call overhead
11156 will not be important. */
11157 if (!TARGET_INLINE_ALL_STRINGOPS
11158 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11161 if (TARGET_SINGLE_STRINGOP)
11162 emit_insn (gen_cld ());
11164 countreg2 = gen_reg_rtx (Pmode);
11165 countreg = copy_to_mode_reg (counter_mode, count_exp);
11167 /* We don't use loops to align destination and to copy parts smaller
11168 than 4 bytes, because gcc is able to optimize such code better (in
11169 the case the destination or the count really is aligned, gcc is often
11170 able to predict the branches) and also it is friendlier to the
11171 hardware branch prediction.
11173 Using loops is beneficial for generic case, because we can
11174 handle small counts using the loops. Many CPUs (such as Athlon)
11175 have large REP prefix setup costs.
11177 This is quite costly. Maybe we can revisit this decision later or
11178 add some customizability to this code. */
11180 if (count == 0 && align < desired_alignment)
11182 label = gen_label_rtx ();
11183 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11184 LEU, 0, counter_mode, 1, label);
11188 rtx label = ix86_expand_aligntest (destreg, 1);
11189 srcmem = change_address (src, QImode, srcreg);
11190 dstmem = change_address (dst, QImode, destreg);
11191 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11192 ix86_adjust_counter (countreg, 1);
11193 emit_label (label);
11194 LABEL_NUSES (label) = 1;
11198 rtx label = ix86_expand_aligntest (destreg, 2);
11199 srcmem = change_address (src, HImode, srcreg);
11200 dstmem = change_address (dst, HImode, destreg);
11201 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11202 ix86_adjust_counter (countreg, 2);
11203 emit_label (label);
11204 LABEL_NUSES (label) = 1;
11206 if (align <= 4 && desired_alignment > 4)
11208 rtx label = ix86_expand_aligntest (destreg, 4);
11209 srcmem = change_address (src, SImode, srcreg);
11210 dstmem = change_address (dst, SImode, destreg);
11211 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11212 ix86_adjust_counter (countreg, 4);
11213 emit_label (label);
11214 LABEL_NUSES (label) = 1;
11217 if (label && desired_alignment > 4 && !TARGET_64BIT)
11219 emit_label (label);
11220 LABEL_NUSES (label) = 1;
11223 if (!TARGET_SINGLE_STRINGOP)
11224 emit_insn (gen_cld ());
11227 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11229 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11233 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11234 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11236 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11237 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11238 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11239 countreg2, destexp, srcexp));
11243 emit_label (label);
11244 LABEL_NUSES (label) = 1;
11246 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11248 srcmem = change_address (src, SImode, srcreg);
11249 dstmem = change_address (dst, SImode, destreg);
11250 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11252 if ((align <= 4 || count == 0) && TARGET_64BIT)
11254 rtx label = ix86_expand_aligntest (countreg, 4);
11255 srcmem = change_address (src, SImode, srcreg);
11256 dstmem = change_address (dst, SImode, destreg);
11257 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11258 emit_label (label);
11259 LABEL_NUSES (label) = 1;
11261 if (align > 2 && count != 0 && (count & 2))
11263 srcmem = change_address (src, HImode, srcreg);
11264 dstmem = change_address (dst, HImode, destreg);
11265 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267 if (align <= 2 || count == 0)
11269 rtx label = ix86_expand_aligntest (countreg, 2);
11270 srcmem = change_address (src, HImode, srcreg);
11271 dstmem = change_address (dst, HImode, destreg);
11272 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11273 emit_label (label);
11274 LABEL_NUSES (label) = 1;
11276 if (align > 1 && count != 0 && (count & 1))
11278 srcmem = change_address (src, QImode, srcreg);
11279 dstmem = change_address (dst, QImode, destreg);
11280 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11282 if (align <= 1 || count == 0)
11284 rtx label = ix86_expand_aligntest (countreg, 1);
11285 srcmem = change_address (src, QImode, srcreg);
11286 dstmem = change_address (dst, QImode, destreg);
11287 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11288 emit_label (label);
11289 LABEL_NUSES (label) = 1;
11296 /* Expand string clear operation (bzero). Use i386 string operations when
11297 profitable. expand_movstr contains similar code. */
11299 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11301 rtx destreg, zeroreg, countreg, destexp;
11302 enum machine_mode counter_mode;
11303 HOST_WIDE_INT align = 0;
11304 unsigned HOST_WIDE_INT count = 0;
11306 if (GET_CODE (align_exp) == CONST_INT)
11307 align = INTVAL (align_exp);
11309 /* Can't use any of this if the user has appropriated esi. */
11310 if (global_regs[4])
11313 /* This simple hack avoids all inlining code and simplifies code below. */
11314 if (!TARGET_ALIGN_STRINGOPS)
11317 if (GET_CODE (count_exp) == CONST_INT)
11319 count = INTVAL (count_exp);
11320 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11323 /* Figure out proper mode for counter. For 32bits it is always SImode,
11324 for 64bits use SImode when possible, otherwise DImode.
11325 Set count to number of bytes copied when known at compile time. */
11326 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11327 || x86_64_zero_extended_value (count_exp))
11328 counter_mode = SImode;
11330 counter_mode = DImode;
11332 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11333 if (destreg != XEXP (dst, 0))
11334 dst = replace_equiv_address_nv (dst, destreg);
11336 emit_insn (gen_cld ());
11338 /* When optimizing for size emit simple rep ; movsb instruction for
11339 counts not divisible by 4. */
11341 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11343 countreg = ix86_zero_extend_to_Pmode (count_exp);
11344 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11345 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11346 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11348 else if (count != 0
11350 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11351 || optimize_size || count < (unsigned int) 64))
11353 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11354 unsigned HOST_WIDE_INT offset = 0;
11356 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11357 if (count & ~(size - 1))
11359 countreg = copy_to_mode_reg (counter_mode,
11360 GEN_INT ((count >> (size == 4 ? 2 : 3))
11361 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11362 countreg = ix86_zero_extend_to_Pmode (countreg);
11363 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11364 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11365 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11366 offset = count & ~(size - 1);
11368 if (size == 8 && (count & 0x04))
11370 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11372 emit_insn (gen_strset (destreg, mem,
11373 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11378 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11380 emit_insn (gen_strset (destreg, mem,
11381 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11386 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11388 emit_insn (gen_strset (destreg, mem,
11389 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11396 /* Compute desired alignment of the string operation. */
11397 int desired_alignment = (TARGET_PENTIUMPRO
11398 && (count == 0 || count >= (unsigned int) 260)
11399 ? 8 : UNITS_PER_WORD);
11401 /* In case we don't know anything about the alignment, default to
11402 library version, since it is usually equally fast and result in
11405 Also emit call when we know that the count is large and call overhead
11406 will not be important. */
11407 if (!TARGET_INLINE_ALL_STRINGOPS
11408 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11411 if (TARGET_SINGLE_STRINGOP)
11412 emit_insn (gen_cld ());
11414 countreg2 = gen_reg_rtx (Pmode);
11415 countreg = copy_to_mode_reg (counter_mode, count_exp);
11416 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11417 /* Get rid of MEM_OFFSET, it won't be accurate. */
11418 dst = change_address (dst, BLKmode, destreg);
11420 if (count == 0 && align < desired_alignment)
11422 label = gen_label_rtx ();
11423 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11424 LEU, 0, counter_mode, 1, label);
11428 rtx label = ix86_expand_aligntest (destreg, 1);
11429 emit_insn (gen_strset (destreg, dst,
11430 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11431 ix86_adjust_counter (countreg, 1);
11432 emit_label (label);
11433 LABEL_NUSES (label) = 1;
11437 rtx label = ix86_expand_aligntest (destreg, 2);
11438 emit_insn (gen_strset (destreg, dst,
11439 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11440 ix86_adjust_counter (countreg, 2);
11441 emit_label (label);
11442 LABEL_NUSES (label) = 1;
11444 if (align <= 4 && desired_alignment > 4)
11446 rtx label = ix86_expand_aligntest (destreg, 4);
11447 emit_insn (gen_strset (destreg, dst,
11449 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11451 ix86_adjust_counter (countreg, 4);
11452 emit_label (label);
11453 LABEL_NUSES (label) = 1;
11456 if (label && desired_alignment > 4 && !TARGET_64BIT)
11458 emit_label (label);
11459 LABEL_NUSES (label) = 1;
11463 if (!TARGET_SINGLE_STRINGOP)
11464 emit_insn (gen_cld ());
11467 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11469 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11473 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11474 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11476 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11477 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11481 emit_label (label);
11482 LABEL_NUSES (label) = 1;
11485 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11486 emit_insn (gen_strset (destreg, dst,
11487 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11488 if (TARGET_64BIT && (align <= 4 || count == 0))
11490 rtx label = ix86_expand_aligntest (countreg, 4);
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11493 emit_label (label);
11494 LABEL_NUSES (label) = 1;
11496 if (align > 2 && count != 0 && (count & 2))
11497 emit_insn (gen_strset (destreg, dst,
11498 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11499 if (align <= 2 || count == 0)
11501 rtx label = ix86_expand_aligntest (countreg, 2);
11502 emit_insn (gen_strset (destreg, dst,
11503 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11504 emit_label (label);
11505 LABEL_NUSES (label) = 1;
11507 if (align > 1 && count != 0 && (count & 1))
11508 emit_insn (gen_strset (destreg, dst,
11509 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11510 if (align <= 1 || count == 0)
11512 rtx label = ix86_expand_aligntest (countreg, 1);
11513 emit_insn (gen_strset (destreg, dst,
11514 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11515 emit_label (label);
11516 LABEL_NUSES (label) = 1;
11522 /* Expand strlen. */
11524 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11526 rtx addr, scratch1, scratch2, scratch3, scratch4;
11528 /* The generic case of strlen expander is long. Avoid it's
11529 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11531 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11532 && !TARGET_INLINE_ALL_STRINGOPS
11534 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11537 addr = force_reg (Pmode, XEXP (src, 0));
11538 scratch1 = gen_reg_rtx (Pmode);
11540 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11543 /* Well it seems that some optimizer does not combine a call like
11544 foo(strlen(bar), strlen(bar));
11545 when the move and the subtraction is done here. It does calculate
11546 the length just once when these instructions are done inside of
11547 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11548 often used and I use one fewer register for the lifetime of
11549 output_strlen_unroll() this is better. */
11551 emit_move_insn (out, addr);
11553 ix86_expand_strlensi_unroll_1 (out, src, align);
11555 /* strlensi_unroll_1 returns the address of the zero at the end of
11556 the string, like memchr(), so compute the length by subtracting
11557 the start address. */
11559 emit_insn (gen_subdi3 (out, out, addr));
11561 emit_insn (gen_subsi3 (out, out, addr));
11566 scratch2 = gen_reg_rtx (Pmode);
11567 scratch3 = gen_reg_rtx (Pmode);
11568 scratch4 = force_reg (Pmode, constm1_rtx);
11570 emit_move_insn (scratch3, addr);
11571 eoschar = force_reg (QImode, eoschar);
11573 emit_insn (gen_cld ());
11574 src = replace_equiv_address_nv (src, scratch3);
11576 /* If .md starts supporting :P, this can be done in .md. */
11577 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11578 scratch4), UNSPEC_SCAS);
11579 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11582 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11583 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11587 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11588 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11594 /* Expand the appropriate insns for doing strlen if not just doing
11597 out = result, initialized with the start address
11598 align_rtx = alignment of the address.
11599 scratch = scratch register, initialized with the startaddress when
11600 not aligned, otherwise undefined
11602 This is just the body. It needs the initializations mentioned above and
11603 some address computing at the end. These things are done in i386.md. */
11606 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11610 rtx align_2_label = NULL_RTX;
11611 rtx align_3_label = NULL_RTX;
11612 rtx align_4_label = gen_label_rtx ();
11613 rtx end_0_label = gen_label_rtx ();
11615 rtx tmpreg = gen_reg_rtx (SImode);
11616 rtx scratch = gen_reg_rtx (SImode);
11620 if (GET_CODE (align_rtx) == CONST_INT)
11621 align = INTVAL (align_rtx);
11623 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11625 /* Is there a known alignment and is it less than 4? */
11628 rtx scratch1 = gen_reg_rtx (Pmode);
11629 emit_move_insn (scratch1, out);
11630 /* Is there a known alignment and is it not 2? */
11633 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11634 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11636 /* Leave just the 3 lower bits. */
11637 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11638 NULL_RTX, 0, OPTAB_WIDEN);
11640 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11641 Pmode, 1, align_4_label);
11642 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11643 Pmode, 1, align_2_label);
11644 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11645 Pmode, 1, align_3_label);
11649 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11650 check if is aligned to 4 - byte. */
11652 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11653 NULL_RTX, 0, OPTAB_WIDEN);
11655 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11656 Pmode, 1, align_4_label);
11659 mem = change_address (src, QImode, out);
11661 /* Now compare the bytes. */
11663 /* Compare the first n unaligned byte on a byte per byte basis. */
11664 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11665 QImode, 1, end_0_label);
11667 /* Increment the address. */
11669 emit_insn (gen_adddi3 (out, out, const1_rtx));
11671 emit_insn (gen_addsi3 (out, out, const1_rtx));
11673 /* Not needed with an alignment of 2 */
11676 emit_label (align_2_label);
11678 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11682 emit_insn (gen_adddi3 (out, out, const1_rtx));
11684 emit_insn (gen_addsi3 (out, out, const1_rtx));
11686 emit_label (align_3_label);
11689 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11693 emit_insn (gen_adddi3 (out, out, const1_rtx));
11695 emit_insn (gen_addsi3 (out, out, const1_rtx));
11698 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11699 align this loop. It gives only huge programs, but does not help to
11701 emit_label (align_4_label);
11703 mem = change_address (src, SImode, out);
11704 emit_move_insn (scratch, mem);
11706 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11708 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11710 /* This formula yields a nonzero result iff one of the bytes is zero.
11711 This saves three branches inside loop and many cycles. */
11713 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11714 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11715 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11716 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11717 gen_int_mode (0x80808080, SImode)));
11718 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11723 rtx reg = gen_reg_rtx (SImode);
11724 rtx reg2 = gen_reg_rtx (Pmode);
11725 emit_move_insn (reg, tmpreg);
11726 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11728 /* If zero is not in the first two bytes, move two bytes forward. */
11729 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11730 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11731 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11732 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11733 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11736 /* Emit lea manually to avoid clobbering of flags. */
11737 emit_insn (gen_rtx_SET (SImode, reg2,
11738 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11740 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11741 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11742 emit_insn (gen_rtx_SET (VOIDmode, out,
11743 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11750 rtx end_2_label = gen_label_rtx ();
11751 /* Is zero in the first two bytes? */
11753 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11754 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11755 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11756 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11757 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11759 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11760 JUMP_LABEL (tmp) = end_2_label;
11762 /* Not in the first two. Move two bytes forward. */
11763 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11765 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11767 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11769 emit_label (end_2_label);
11773 /* Avoid branch in fixing the byte. */
11774 tmpreg = gen_lowpart (QImode, tmpreg);
11775 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11776 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11778 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11780 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11782 emit_label (end_0_label);
11786 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11787 rtx callarg2 ATTRIBUTE_UNUSED,
11788 rtx pop, int sibcall)
11790 rtx use = NULL, call;
11792 if (pop == const0_rtx)
11794 if (TARGET_64BIT && pop)
11798 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11799 fnaddr = machopic_indirect_call_target (fnaddr);
11801 /* Static functions and indirect calls don't need the pic register. */
11802 if (! TARGET_64BIT && flag_pic
11803 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11804 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11805 use_reg (&use, pic_offset_table_rtx);
11807 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11809 rtx al = gen_rtx_REG (QImode, 0);
11810 emit_move_insn (al, callarg2);
11811 use_reg (&use, al);
11813 #endif /* TARGET_MACHO */
11815 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11817 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11818 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11820 if (sibcall && TARGET_64BIT
11821 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11824 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11825 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11826 emit_move_insn (fnaddr, addr);
11827 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11830 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11832 call = gen_rtx_SET (VOIDmode, retval, call);
11835 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11836 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11837 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11840 call = emit_call_insn (call);
11842 CALL_INSN_FUNCTION_USAGE (call) = use;
11846 /* Clear stack slot assignments remembered from previous functions.
11847 This is called from INIT_EXPANDERS once before RTL is emitted for each
11850 static struct machine_function *
11851 ix86_init_machine_status (void)
11853 struct machine_function *f;
11855 f = ggc_alloc_cleared (sizeof (struct machine_function));
11856 f->use_fast_prologue_epilogue_nregs = -1;
11861 /* Return a MEM corresponding to a stack slot with mode MODE.
11862 Allocate a new slot if necessary.
11864 The RTL for a function can have several slots available: N is
11865 which slot to use. */
11868 assign_386_stack_local (enum machine_mode mode, int n)
11870 struct stack_local_entry *s;
11872 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11875 for (s = ix86_stack_locals; s; s = s->next)
11876 if (s->mode == mode && s->n == n)
11879 s = (struct stack_local_entry *)
11880 ggc_alloc (sizeof (struct stack_local_entry));
11883 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11885 s->next = ix86_stack_locals;
11886 ix86_stack_locals = s;
11890 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11892 static GTY(()) rtx ix86_tls_symbol;
11894 ix86_tls_get_addr (void)
11897 if (!ix86_tls_symbol)
11899 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11900 (TARGET_GNU_TLS && !TARGET_64BIT)
11901 ? "___tls_get_addr"
11902 : "__tls_get_addr");
11905 return ix86_tls_symbol;
11908 /* Calculate the length of the memory address in the instruction
11909 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11912 memory_address_length (rtx addr)
11914 struct ix86_address parts;
11915 rtx base, index, disp;
11918 if (GET_CODE (addr) == PRE_DEC
11919 || GET_CODE (addr) == POST_INC
11920 || GET_CODE (addr) == PRE_MODIFY
11921 || GET_CODE (addr) == POST_MODIFY)
11924 if (! ix86_decompose_address (addr, &parts))
11928 index = parts.index;
11933 - esp as the base always wants an index,
11934 - ebp as the base always wants a displacement. */
11936 /* Register Indirect. */
11937 if (base && !index && !disp)
11939 /* esp (for its index) and ebp (for its displacement) need
11940 the two-byte modrm form. */
11941 if (addr == stack_pointer_rtx
11942 || addr == arg_pointer_rtx
11943 || addr == frame_pointer_rtx
11944 || addr == hard_frame_pointer_rtx)
11948 /* Direct Addressing. */
11949 else if (disp && !base && !index)
11954 /* Find the length of the displacement constant. */
11957 if (GET_CODE (disp) == CONST_INT
11958 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11964 /* ebp always wants a displacement. */
11965 else if (base == hard_frame_pointer_rtx)
11968 /* An index requires the two-byte modrm form.... */
11970 /* ...like esp, which always wants an index. */
11971 || base == stack_pointer_rtx
11972 || base == arg_pointer_rtx
11973 || base == frame_pointer_rtx)
11980 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11981 is set, expect that insn have 8bit immediate alternative. */
11983 ix86_attr_length_immediate_default (rtx insn, int shortform)
11987 extract_insn_cached (insn);
11988 for (i = recog_data.n_operands - 1; i >= 0; --i)
11989 if (CONSTANT_P (recog_data.operand[i]))
11994 && GET_CODE (recog_data.operand[i]) == CONST_INT
11995 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11999 switch (get_attr_mode (insn))
12010 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12015 fatal_insn ("unknown insn mode", insn);
12021 /* Compute default value for "length_address" attribute. */
12023 ix86_attr_length_address_default (rtx insn)
12027 if (get_attr_type (insn) == TYPE_LEA)
12029 rtx set = PATTERN (insn);
12030 if (GET_CODE (set) == SET)
12032 else if (GET_CODE (set) == PARALLEL
12033 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12034 set = XVECEXP (set, 0, 0);
12037 #ifdef ENABLE_CHECKING
12043 return memory_address_length (SET_SRC (set));
12046 extract_insn_cached (insn);
12047 for (i = recog_data.n_operands - 1; i >= 0; --i)
12048 if (GET_CODE (recog_data.operand[i]) == MEM)
12050 return memory_address_length (XEXP (recog_data.operand[i], 0));
12056 /* Return the maximum number of instructions a cpu can issue. */
12059 ix86_issue_rate (void)
12063 case PROCESSOR_PENTIUM:
12067 case PROCESSOR_PENTIUMPRO:
12068 case PROCESSOR_PENTIUM4:
12069 case PROCESSOR_ATHLON:
12078 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12079 by DEP_INSN and nothing set by DEP_INSN. */
12082 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12086 /* Simplify the test for uninteresting insns. */
12087 if (insn_type != TYPE_SETCC
12088 && insn_type != TYPE_ICMOV
12089 && insn_type != TYPE_FCMOV
12090 && insn_type != TYPE_IBR)
12093 if ((set = single_set (dep_insn)) != 0)
12095 set = SET_DEST (set);
12098 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12099 && XVECLEN (PATTERN (dep_insn), 0) == 2
12100 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12101 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12103 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12104 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12109 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12112 /* This test is true if the dependent insn reads the flags but
12113 not any other potentially set register. */
12114 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12117 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12123 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12124 address with operands set by DEP_INSN. */
12127 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12131 if (insn_type == TYPE_LEA
12134 addr = PATTERN (insn);
12135 if (GET_CODE (addr) == SET)
12137 else if (GET_CODE (addr) == PARALLEL
12138 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12139 addr = XVECEXP (addr, 0, 0);
12142 addr = SET_SRC (addr);
12147 extract_insn_cached (insn);
12148 for (i = recog_data.n_operands - 1; i >= 0; --i)
12149 if (GET_CODE (recog_data.operand[i]) == MEM)
12151 addr = XEXP (recog_data.operand[i], 0);
12158 return modified_in_p (addr, dep_insn);
12162 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12164 enum attr_type insn_type, dep_insn_type;
12165 enum attr_memory memory, dep_memory;
12167 int dep_insn_code_number;
12169 /* Anti and output dependencies have zero cost on all CPUs. */
12170 if (REG_NOTE_KIND (link) != 0)
12173 dep_insn_code_number = recog_memoized (dep_insn);
12175 /* If we can't recognize the insns, we can't really do anything. */
12176 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12179 insn_type = get_attr_type (insn);
12180 dep_insn_type = get_attr_type (dep_insn);
12184 case PROCESSOR_PENTIUM:
12185 /* Address Generation Interlock adds a cycle of latency. */
12186 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12189 /* ??? Compares pair with jump/setcc. */
12190 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12193 /* Floating point stores require value to be ready one cycle earlier. */
12194 if (insn_type == TYPE_FMOV
12195 && get_attr_memory (insn) == MEMORY_STORE
12196 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12200 case PROCESSOR_PENTIUMPRO:
12201 memory = get_attr_memory (insn);
12202 dep_memory = get_attr_memory (dep_insn);
12204 /* Since we can't represent delayed latencies of load+operation,
12205 increase the cost here for non-imov insns. */
12206 if (dep_insn_type != TYPE_IMOV
12207 && dep_insn_type != TYPE_FMOV
12208 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12211 /* INT->FP conversion is expensive. */
12212 if (get_attr_fp_int_src (dep_insn))
12215 /* There is one cycle extra latency between an FP op and a store. */
12216 if (insn_type == TYPE_FMOV
12217 && (set = single_set (dep_insn)) != NULL_RTX
12218 && (set2 = single_set (insn)) != NULL_RTX
12219 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12220 && GET_CODE (SET_DEST (set2)) == MEM)
12223 /* Show ability of reorder buffer to hide latency of load by executing
12224 in parallel with previous instruction in case
12225 previous instruction is not needed to compute the address. */
12226 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12227 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12229 /* Claim moves to take one cycle, as core can issue one load
12230 at time and the next load can start cycle later. */
12231 if (dep_insn_type == TYPE_IMOV
12232 || dep_insn_type == TYPE_FMOV)
12240 memory = get_attr_memory (insn);
12241 dep_memory = get_attr_memory (dep_insn);
12242 /* The esp dependency is resolved before the instruction is really
12244 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12245 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12248 /* Since we can't represent delayed latencies of load+operation,
12249 increase the cost here for non-imov insns. */
12250 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12251 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12253 /* INT->FP conversion is expensive. */
12254 if (get_attr_fp_int_src (dep_insn))
12257 /* Show ability of reorder buffer to hide latency of load by executing
12258 in parallel with previous instruction in case
12259 previous instruction is not needed to compute the address. */
12260 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12261 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12263 /* Claim moves to take one cycle, as core can issue one load
12264 at time and the next load can start cycle later. */
12265 if (dep_insn_type == TYPE_IMOV
12266 || dep_insn_type == TYPE_FMOV)
12275 case PROCESSOR_ATHLON:
12277 memory = get_attr_memory (insn);
12278 dep_memory = get_attr_memory (dep_insn);
12280 /* Show ability of reorder buffer to hide latency of load by executing
12281 in parallel with previous instruction in case
12282 previous instruction is not needed to compute the address. */
12283 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12284 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12286 enum attr_unit unit = get_attr_unit (insn);
12289 /* Because of the difference between the length of integer and
12290 floating unit pipeline preparation stages, the memory operands
12291 for floating point are cheaper.
12293 ??? For Athlon it the difference is most probably 2. */
12294 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12297 loadcost = TARGET_ATHLON ? 2 : 0;
12299 if (cost >= loadcost)
12314 struct ppro_sched_data
12317 int issued_this_cycle;
12321 static enum attr_ppro_uops
12322 ix86_safe_ppro_uops (rtx insn)
12324 if (recog_memoized (insn) >= 0)
12325 return get_attr_ppro_uops (insn);
12327 return PPRO_UOPS_MANY;
12331 ix86_dump_ppro_packet (FILE *dump)
12333 if (ix86_sched_data.ppro.decode[0])
12335 fprintf (dump, "PPRO packet: %d",
12336 INSN_UID (ix86_sched_data.ppro.decode[0]));
12337 if (ix86_sched_data.ppro.decode[1])
12338 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12339 if (ix86_sched_data.ppro.decode[2])
12340 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12341 fputc ('\n', dump);
12345 /* We're beginning a new block. Initialize data structures as necessary. */
12348 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12349 int sched_verbose ATTRIBUTE_UNUSED,
12350 int veclen ATTRIBUTE_UNUSED)
12352 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12355 /* Shift INSN to SLOT, and shift everything else down. */
12358 ix86_reorder_insn (rtx *insnp, rtx *slot)
12364 insnp[0] = insnp[1];
12365 while (++insnp != slot);
12371 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12374 enum attr_ppro_uops cur_uops;
12375 int issued_this_cycle;
12379 /* At this point .ppro.decode contains the state of the three
12380 decoders from last "cycle". That is, those insns that were
12381 actually independent. But here we're scheduling for the
12382 decoder, and we may find things that are decodable in the
12385 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12386 issued_this_cycle = 0;
12389 cur_uops = ix86_safe_ppro_uops (*insnp);
12391 /* If the decoders are empty, and we've a complex insn at the
12392 head of the priority queue, let it issue without complaint. */
12393 if (decode[0] == NULL)
12395 if (cur_uops == PPRO_UOPS_MANY)
12397 decode[0] = *insnp;
12401 /* Otherwise, search for a 2-4 uop unsn to issue. */
12402 while (cur_uops != PPRO_UOPS_FEW)
12404 if (insnp == ready)
12406 cur_uops = ix86_safe_ppro_uops (*--insnp);
12409 /* If so, move it to the head of the line. */
12410 if (cur_uops == PPRO_UOPS_FEW)
12411 ix86_reorder_insn (insnp, e_ready);
12413 /* Issue the head of the queue. */
12414 issued_this_cycle = 1;
12415 decode[0] = *e_ready--;
12418 /* Look for simple insns to fill in the other two slots. */
12419 for (i = 1; i < 3; ++i)
12420 if (decode[i] == NULL)
12422 if (ready > e_ready)
12426 cur_uops = ix86_safe_ppro_uops (*insnp);
12427 while (cur_uops != PPRO_UOPS_ONE)
12429 if (insnp == ready)
12431 cur_uops = ix86_safe_ppro_uops (*--insnp);
12434 /* Found one. Move it to the head of the queue and issue it. */
12435 if (cur_uops == PPRO_UOPS_ONE)
12437 ix86_reorder_insn (insnp, e_ready);
12438 decode[i] = *e_ready--;
12439 issued_this_cycle++;
12443 /* ??? Didn't find one. Ideally, here we would do a lazy split
12444 of 2-uop insns, issue one and queue the other. */
12448 if (issued_this_cycle == 0)
12449 issued_this_cycle = 1;
12450 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12453 /* We are about to being issuing insns for this clock cycle.
12454 Override the default sort algorithm to better slot instructions. */
12456 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12457 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12458 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12460 int n_ready = *n_readyp;
12461 rtx *e_ready = ready + n_ready - 1;
12463 /* Make sure to go ahead and initialize key items in
12464 ix86_sched_data if we are not going to bother trying to
12465 reorder the ready queue. */
12468 ix86_sched_data.ppro.issued_this_cycle = 1;
12477 case PROCESSOR_PENTIUMPRO:
12478 ix86_sched_reorder_ppro (ready, e_ready);
12483 return ix86_issue_rate ();
12486 /* We are about to issue INSN. Return the number of insns left on the
12487 ready queue that can be issued this cycle. */
12490 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12491 int can_issue_more)
12497 return can_issue_more - 1;
12499 case PROCESSOR_PENTIUMPRO:
12501 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12503 if (uops == PPRO_UOPS_MANY)
12506 ix86_dump_ppro_packet (dump);
12507 ix86_sched_data.ppro.decode[0] = insn;
12508 ix86_sched_data.ppro.decode[1] = NULL;
12509 ix86_sched_data.ppro.decode[2] = NULL;
12511 ix86_dump_ppro_packet (dump);
12512 ix86_sched_data.ppro.decode[0] = NULL;
12514 else if (uops == PPRO_UOPS_FEW)
12517 ix86_dump_ppro_packet (dump);
12518 ix86_sched_data.ppro.decode[0] = insn;
12519 ix86_sched_data.ppro.decode[1] = NULL;
12520 ix86_sched_data.ppro.decode[2] = NULL;
12524 for (i = 0; i < 3; ++i)
12525 if (ix86_sched_data.ppro.decode[i] == NULL)
12527 ix86_sched_data.ppro.decode[i] = insn;
12535 ix86_dump_ppro_packet (dump);
12536 ix86_sched_data.ppro.decode[0] = NULL;
12537 ix86_sched_data.ppro.decode[1] = NULL;
12538 ix86_sched_data.ppro.decode[2] = NULL;
12542 return --ix86_sched_data.ppro.issued_this_cycle;
12547 ia32_use_dfa_pipeline_interface (void)
12549 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12554 /* How many alternative schedules to try. This should be as wide as the
12555 scheduling freedom in the DFA, but no wider. Making this value too
12556 large results extra work for the scheduler. */
12559 ia32_multipass_dfa_lookahead (void)
12561 if (ix86_tune == PROCESSOR_PENTIUM)
12568 /* Compute the alignment given to a constant that is being placed in memory.
12569 EXP is the constant and ALIGN is the alignment that the object would
12571 The value of this function is used instead of that alignment to align
12575 ix86_constant_alignment (tree exp, int align)
12577 if (TREE_CODE (exp) == REAL_CST)
12579 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12581 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12584 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12585 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12586 return BITS_PER_WORD;
12591 /* Compute the alignment for a static variable.
12592 TYPE is the data type, and ALIGN is the alignment that
12593 the object would ordinarily have. The value of this function is used
12594 instead of that alignment to align the object. */
12597 ix86_data_alignment (tree type, int align)
12599 if (AGGREGATE_TYPE_P (type)
12600 && TYPE_SIZE (type)
12601 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12602 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12603 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12606 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12607 to 16byte boundary. */
12610 if (AGGREGATE_TYPE_P (type)
12611 && TYPE_SIZE (type)
12612 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12613 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12614 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12618 if (TREE_CODE (type) == ARRAY_TYPE)
12620 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12622 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12625 else if (TREE_CODE (type) == COMPLEX_TYPE)
12628 if (TYPE_MODE (type) == DCmode && align < 64)
12630 if (TYPE_MODE (type) == XCmode && align < 128)
12633 else if ((TREE_CODE (type) == RECORD_TYPE
12634 || TREE_CODE (type) == UNION_TYPE
12635 || TREE_CODE (type) == QUAL_UNION_TYPE)
12636 && TYPE_FIELDS (type))
12638 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12640 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12643 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12644 || TREE_CODE (type) == INTEGER_TYPE)
12646 if (TYPE_MODE (type) == DFmode && align < 64)
12648 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12655 /* Compute the alignment for a local variable.
12656 TYPE is the data type, and ALIGN is the alignment that
12657 the object would ordinarily have. The value of this macro is used
12658 instead of that alignment to align the object. */
12661 ix86_local_alignment (tree type, int align)
12663 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12664 to 16byte boundary. */
12667 if (AGGREGATE_TYPE_P (type)
12668 && TYPE_SIZE (type)
12669 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12670 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12671 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12674 if (TREE_CODE (type) == ARRAY_TYPE)
12676 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12678 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12681 else if (TREE_CODE (type) == COMPLEX_TYPE)
12683 if (TYPE_MODE (type) == DCmode && align < 64)
12685 if (TYPE_MODE (type) == XCmode && align < 128)
12688 else if ((TREE_CODE (type) == RECORD_TYPE
12689 || TREE_CODE (type) == UNION_TYPE
12690 || TREE_CODE (type) == QUAL_UNION_TYPE)
12691 && TYPE_FIELDS (type))
12693 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12695 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12698 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12699 || TREE_CODE (type) == INTEGER_TYPE)
12702 if (TYPE_MODE (type) == DFmode && align < 64)
12704 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12710 /* Emit RTL insns to initialize the variable parts of a trampoline.
12711 FNADDR is an RTX for the address of the function's pure code.
12712 CXT is an RTX for the static chain value for the function. */
12714 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12718 /* Compute offset from the end of the jmp to the target function. */
12719 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12720 plus_constant (tramp, 10),
12721 NULL_RTX, 1, OPTAB_DIRECT);
12722 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12723 gen_int_mode (0xb9, QImode));
12724 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12725 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12726 gen_int_mode (0xe9, QImode));
12727 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12732 /* Try to load address using shorter movl instead of movabs.
12733 We may want to support movq for kernel mode, but kernel does not use
12734 trampolines at the moment. */
12735 if (x86_64_zero_extended_value (fnaddr))
12737 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12738 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12739 gen_int_mode (0xbb41, HImode));
12740 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12741 gen_lowpart (SImode, fnaddr));
12746 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12747 gen_int_mode (0xbb49, HImode));
12748 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12752 /* Load static chain using movabs to r10. */
12753 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12754 gen_int_mode (0xba49, HImode));
12755 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12758 /* Jump to the r11 */
12759 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12760 gen_int_mode (0xff49, HImode));
12761 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12762 gen_int_mode (0xe3, QImode));
12764 if (offset > TRAMPOLINE_SIZE)
12768 #ifdef TRANSFER_FROM_TRAMPOLINE
12769 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12770 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12774 #define def_builtin(MASK, NAME, TYPE, CODE) \
12776 if ((MASK) & target_flags \
12777 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12778 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12779 NULL, NULL_TREE); \
12782 struct builtin_description
12784 const unsigned int mask;
12785 const enum insn_code icode;
12786 const char *const name;
12787 const enum ix86_builtins code;
12788 const enum rtx_code comparison;
12789 const unsigned int flag;
12792 static const struct builtin_description bdesc_comi[] =
12794 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12795 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12796 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12797 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12798 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12799 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12800 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12801 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12802 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12803 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12804 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12805 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12816 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12820 static const struct builtin_description bdesc_2arg[] =
12823 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12824 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12825 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12826 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12827 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12828 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12829 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12830 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12832 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12833 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12834 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12835 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12836 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12837 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12838 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12839 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12840 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12841 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12842 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12843 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12844 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12845 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12846 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12847 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12848 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12849 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12850 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12851 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12853 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12856 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12860 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12861 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12863 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12864 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12865 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12866 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12867 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12870 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12871 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12872 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12873 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12874 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12875 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12876 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12877 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12879 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12880 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12881 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12882 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12883 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12884 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12885 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12886 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12888 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12889 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12890 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12893 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12894 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12895 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12897 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12898 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12900 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12901 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12902 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12903 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12904 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12905 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12907 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12908 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12909 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12910 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12913 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12917 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12920 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12921 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12922 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12924 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12925 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12926 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12928 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12929 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12931 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12935 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12936 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12937 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12938 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12940 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12942 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12943 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12944 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12945 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12947 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12948 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12961 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12962 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12963 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12964 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12965 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12966 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12967 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12968 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12969 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12970 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12971 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12972 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12973 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12974 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12975 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12976 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12977 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12978 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12979 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12981 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13005 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13006 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13007 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13008 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13009 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13010 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13011 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13012 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13074 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13077 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13078 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13079 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13082 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13083 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13084 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13085 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13086 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13087 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13090 static const struct builtin_description bdesc_1arg[] =
13092 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13093 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13095 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13096 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13097 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13099 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13100 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13101 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13102 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13103 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13104 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13120 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13126 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13127 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13136 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13137 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13138 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13142 ix86_init_builtins (void)
13145 ix86_init_mmx_sse_builtins ();
13148 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13149 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13152 ix86_init_mmx_sse_builtins (void)
13154 const struct builtin_description * d;
13157 tree pchar_type_node = build_pointer_type (char_type_node);
13158 tree pcchar_type_node = build_pointer_type (
13159 build_type_variant (char_type_node, 1, 0));
13160 tree pfloat_type_node = build_pointer_type (float_type_node);
13161 tree pcfloat_type_node = build_pointer_type (
13162 build_type_variant (float_type_node, 1, 0));
13163 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13164 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13165 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13168 tree int_ftype_v4sf_v4sf
13169 = build_function_type_list (integer_type_node,
13170 V4SF_type_node, V4SF_type_node, NULL_TREE);
13171 tree v4si_ftype_v4sf_v4sf
13172 = build_function_type_list (V4SI_type_node,
13173 V4SF_type_node, V4SF_type_node, NULL_TREE);
13174 /* MMX/SSE/integer conversions. */
13175 tree int_ftype_v4sf
13176 = build_function_type_list (integer_type_node,
13177 V4SF_type_node, NULL_TREE);
13178 tree int64_ftype_v4sf
13179 = build_function_type_list (long_long_integer_type_node,
13180 V4SF_type_node, NULL_TREE);
13181 tree int_ftype_v8qi
13182 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13183 tree v4sf_ftype_v4sf_int
13184 = build_function_type_list (V4SF_type_node,
13185 V4SF_type_node, integer_type_node, NULL_TREE);
13186 tree v4sf_ftype_v4sf_int64
13187 = build_function_type_list (V4SF_type_node,
13188 V4SF_type_node, long_long_integer_type_node,
13190 tree v4sf_ftype_v4sf_v2si
13191 = build_function_type_list (V4SF_type_node,
13192 V4SF_type_node, V2SI_type_node, NULL_TREE);
13193 tree int_ftype_v4hi_int
13194 = build_function_type_list (integer_type_node,
13195 V4HI_type_node, integer_type_node, NULL_TREE);
13196 tree v4hi_ftype_v4hi_int_int
13197 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13198 integer_type_node, integer_type_node,
13200 /* Miscellaneous. */
13201 tree v8qi_ftype_v4hi_v4hi
13202 = build_function_type_list (V8QI_type_node,
13203 V4HI_type_node, V4HI_type_node, NULL_TREE);
13204 tree v4hi_ftype_v2si_v2si
13205 = build_function_type_list (V4HI_type_node,
13206 V2SI_type_node, V2SI_type_node, NULL_TREE);
13207 tree v4sf_ftype_v4sf_v4sf_int
13208 = build_function_type_list (V4SF_type_node,
13209 V4SF_type_node, V4SF_type_node,
13210 integer_type_node, NULL_TREE);
13211 tree v2si_ftype_v4hi_v4hi
13212 = build_function_type_list (V2SI_type_node,
13213 V4HI_type_node, V4HI_type_node, NULL_TREE);
13214 tree v4hi_ftype_v4hi_int
13215 = build_function_type_list (V4HI_type_node,
13216 V4HI_type_node, integer_type_node, NULL_TREE);
13217 tree v4hi_ftype_v4hi_di
13218 = build_function_type_list (V4HI_type_node,
13219 V4HI_type_node, long_long_unsigned_type_node,
13221 tree v2si_ftype_v2si_di
13222 = build_function_type_list (V2SI_type_node,
13223 V2SI_type_node, long_long_unsigned_type_node,
13225 tree void_ftype_void
13226 = build_function_type (void_type_node, void_list_node);
13227 tree void_ftype_unsigned
13228 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13229 tree void_ftype_unsigned_unsigned
13230 = build_function_type_list (void_type_node, unsigned_type_node,
13231 unsigned_type_node, NULL_TREE);
13232 tree void_ftype_pcvoid_unsigned_unsigned
13233 = build_function_type_list (void_type_node, const_ptr_type_node,
13234 unsigned_type_node, unsigned_type_node,
13236 tree unsigned_ftype_void
13237 = build_function_type (unsigned_type_node, void_list_node);
13239 = build_function_type (long_long_unsigned_type_node, void_list_node);
13240 tree v4sf_ftype_void
13241 = build_function_type (V4SF_type_node, void_list_node);
13242 tree v2si_ftype_v4sf
13243 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13244 /* Loads/stores. */
13245 tree void_ftype_v8qi_v8qi_pchar
13246 = build_function_type_list (void_type_node,
13247 V8QI_type_node, V8QI_type_node,
13248 pchar_type_node, NULL_TREE);
13249 tree v4sf_ftype_pcfloat
13250 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13251 /* @@@ the type is bogus */
13252 tree v4sf_ftype_v4sf_pv2si
13253 = build_function_type_list (V4SF_type_node,
13254 V4SF_type_node, pv2si_type_node, NULL_TREE);
13255 tree void_ftype_pv2si_v4sf
13256 = build_function_type_list (void_type_node,
13257 pv2si_type_node, V4SF_type_node, NULL_TREE);
13258 tree void_ftype_pfloat_v4sf
13259 = build_function_type_list (void_type_node,
13260 pfloat_type_node, V4SF_type_node, NULL_TREE);
13261 tree void_ftype_pdi_di
13262 = build_function_type_list (void_type_node,
13263 pdi_type_node, long_long_unsigned_type_node,
13265 tree void_ftype_pv2di_v2di
13266 = build_function_type_list (void_type_node,
13267 pv2di_type_node, V2DI_type_node, NULL_TREE);
13268 /* Normal vector unops. */
13269 tree v4sf_ftype_v4sf
13270 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13272 /* Normal vector binops. */
13273 tree v4sf_ftype_v4sf_v4sf
13274 = build_function_type_list (V4SF_type_node,
13275 V4SF_type_node, V4SF_type_node, NULL_TREE);
13276 tree v8qi_ftype_v8qi_v8qi
13277 = build_function_type_list (V8QI_type_node,
13278 V8QI_type_node, V8QI_type_node, NULL_TREE);
13279 tree v4hi_ftype_v4hi_v4hi
13280 = build_function_type_list (V4HI_type_node,
13281 V4HI_type_node, V4HI_type_node, NULL_TREE);
13282 tree v2si_ftype_v2si_v2si
13283 = build_function_type_list (V2SI_type_node,
13284 V2SI_type_node, V2SI_type_node, NULL_TREE);
13285 tree di_ftype_di_di
13286 = build_function_type_list (long_long_unsigned_type_node,
13287 long_long_unsigned_type_node,
13288 long_long_unsigned_type_node, NULL_TREE);
13290 tree v2si_ftype_v2sf
13291 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13292 tree v2sf_ftype_v2si
13293 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13294 tree v2si_ftype_v2si
13295 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13296 tree v2sf_ftype_v2sf
13297 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13298 tree v2sf_ftype_v2sf_v2sf
13299 = build_function_type_list (V2SF_type_node,
13300 V2SF_type_node, V2SF_type_node, NULL_TREE);
13301 tree v2si_ftype_v2sf_v2sf
13302 = build_function_type_list (V2SI_type_node,
13303 V2SF_type_node, V2SF_type_node, NULL_TREE);
13304 tree pint_type_node = build_pointer_type (integer_type_node);
13305 tree pcint_type_node = build_pointer_type (
13306 build_type_variant (integer_type_node, 1, 0));
13307 tree pdouble_type_node = build_pointer_type (double_type_node);
13308 tree pcdouble_type_node = build_pointer_type (
13309 build_type_variant (double_type_node, 1, 0));
13310 tree int_ftype_v2df_v2df
13311 = build_function_type_list (integer_type_node,
13312 V2DF_type_node, V2DF_type_node, NULL_TREE);
13315 = build_function_type (intTI_type_node, void_list_node);
13316 tree v2di_ftype_void
13317 = build_function_type (V2DI_type_node, void_list_node);
13318 tree ti_ftype_ti_ti
13319 = build_function_type_list (intTI_type_node,
13320 intTI_type_node, intTI_type_node, NULL_TREE);
13321 tree void_ftype_pcvoid
13322 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13324 = build_function_type_list (V2DI_type_node,
13325 long_long_unsigned_type_node, NULL_TREE);
13327 = build_function_type_list (long_long_unsigned_type_node,
13328 V2DI_type_node, NULL_TREE);
13329 tree v4sf_ftype_v4si
13330 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13331 tree v4si_ftype_v4sf
13332 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13333 tree v2df_ftype_v4si
13334 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13335 tree v4si_ftype_v2df
13336 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13337 tree v2si_ftype_v2df
13338 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13339 tree v4sf_ftype_v2df
13340 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13341 tree v2df_ftype_v2si
13342 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13343 tree v2df_ftype_v4sf
13344 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13345 tree int_ftype_v2df
13346 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13347 tree int64_ftype_v2df
13348 = build_function_type_list (long_long_integer_type_node,
13349 V2DF_type_node, NULL_TREE);
13350 tree v2df_ftype_v2df_int
13351 = build_function_type_list (V2DF_type_node,
13352 V2DF_type_node, integer_type_node, NULL_TREE);
13353 tree v2df_ftype_v2df_int64
13354 = build_function_type_list (V2DF_type_node,
13355 V2DF_type_node, long_long_integer_type_node,
13357 tree v4sf_ftype_v4sf_v2df
13358 = build_function_type_list (V4SF_type_node,
13359 V4SF_type_node, V2DF_type_node, NULL_TREE);
13360 tree v2df_ftype_v2df_v4sf
13361 = build_function_type_list (V2DF_type_node,
13362 V2DF_type_node, V4SF_type_node, NULL_TREE);
13363 tree v2df_ftype_v2df_v2df_int
13364 = build_function_type_list (V2DF_type_node,
13365 V2DF_type_node, V2DF_type_node,
13368 tree v2df_ftype_v2df_pv2si
13369 = build_function_type_list (V2DF_type_node,
13370 V2DF_type_node, pv2si_type_node, NULL_TREE);
13371 tree void_ftype_pv2si_v2df
13372 = build_function_type_list (void_type_node,
13373 pv2si_type_node, V2DF_type_node, NULL_TREE);
13374 tree void_ftype_pdouble_v2df
13375 = build_function_type_list (void_type_node,
13376 pdouble_type_node, V2DF_type_node, NULL_TREE);
13377 tree void_ftype_pint_int
13378 = build_function_type_list (void_type_node,
13379 pint_type_node, integer_type_node, NULL_TREE);
13380 tree void_ftype_v16qi_v16qi_pchar
13381 = build_function_type_list (void_type_node,
13382 V16QI_type_node, V16QI_type_node,
13383 pchar_type_node, NULL_TREE);
13384 tree v2df_ftype_pcdouble
13385 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13386 tree v2df_ftype_v2df_v2df
13387 = build_function_type_list (V2DF_type_node,
13388 V2DF_type_node, V2DF_type_node, NULL_TREE);
13389 tree v16qi_ftype_v16qi_v16qi
13390 = build_function_type_list (V16QI_type_node,
13391 V16QI_type_node, V16QI_type_node, NULL_TREE);
13392 tree v8hi_ftype_v8hi_v8hi
13393 = build_function_type_list (V8HI_type_node,
13394 V8HI_type_node, V8HI_type_node, NULL_TREE);
13395 tree v4si_ftype_v4si_v4si
13396 = build_function_type_list (V4SI_type_node,
13397 V4SI_type_node, V4SI_type_node, NULL_TREE);
13398 tree v2di_ftype_v2di_v2di
13399 = build_function_type_list (V2DI_type_node,
13400 V2DI_type_node, V2DI_type_node, NULL_TREE);
13401 tree v2di_ftype_v2df_v2df
13402 = build_function_type_list (V2DI_type_node,
13403 V2DF_type_node, V2DF_type_node, NULL_TREE);
13404 tree v2df_ftype_v2df
13405 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13406 tree v2df_ftype_double
13407 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13408 tree v2df_ftype_double_double
13409 = build_function_type_list (V2DF_type_node,
13410 double_type_node, double_type_node, NULL_TREE);
13411 tree int_ftype_v8hi_int
13412 = build_function_type_list (integer_type_node,
13413 V8HI_type_node, integer_type_node, NULL_TREE);
13414 tree v8hi_ftype_v8hi_int_int
13415 = build_function_type_list (V8HI_type_node,
13416 V8HI_type_node, integer_type_node,
13417 integer_type_node, NULL_TREE);
13418 tree v2di_ftype_v2di_int
13419 = build_function_type_list (V2DI_type_node,
13420 V2DI_type_node, integer_type_node, NULL_TREE);
13421 tree v4si_ftype_v4si_int
13422 = build_function_type_list (V4SI_type_node,
13423 V4SI_type_node, integer_type_node, NULL_TREE);
13424 tree v8hi_ftype_v8hi_int
13425 = build_function_type_list (V8HI_type_node,
13426 V8HI_type_node, integer_type_node, NULL_TREE);
13427 tree v8hi_ftype_v8hi_v2di
13428 = build_function_type_list (V8HI_type_node,
13429 V8HI_type_node, V2DI_type_node, NULL_TREE);
13430 tree v4si_ftype_v4si_v2di
13431 = build_function_type_list (V4SI_type_node,
13432 V4SI_type_node, V2DI_type_node, NULL_TREE);
13433 tree v4si_ftype_v8hi_v8hi
13434 = build_function_type_list (V4SI_type_node,
13435 V8HI_type_node, V8HI_type_node, NULL_TREE);
13436 tree di_ftype_v8qi_v8qi
13437 = build_function_type_list (long_long_unsigned_type_node,
13438 V8QI_type_node, V8QI_type_node, NULL_TREE);
13439 tree v2di_ftype_v16qi_v16qi
13440 = build_function_type_list (V2DI_type_node,
13441 V16QI_type_node, V16QI_type_node, NULL_TREE);
13442 tree int_ftype_v16qi
13443 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13444 tree v16qi_ftype_pcchar
13445 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13446 tree void_ftype_pchar_v16qi
13447 = build_function_type_list (void_type_node,
13448 pchar_type_node, V16QI_type_node, NULL_TREE);
13449 tree v4si_ftype_pcint
13450 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13451 tree void_ftype_pcint_v4si
13452 = build_function_type_list (void_type_node,
13453 pcint_type_node, V4SI_type_node, NULL_TREE);
13454 tree v2di_ftype_v2di
13455 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13458 tree float128_type;
13460 /* The __float80 type. */
13461 if (TYPE_MODE (long_double_type_node) == XFmode)
13462 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13466 /* The __float80 type. */
13467 float80_type = make_node (REAL_TYPE);
13468 TYPE_PRECISION (float80_type) = 96;
13469 layout_type (float80_type);
13470 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13473 float128_type = make_node (REAL_TYPE);
13474 TYPE_PRECISION (float128_type) = 128;
13475 layout_type (float128_type);
13476 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13478 /* Add all builtins that are more or less simple operations on two
13480 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13482 /* Use one of the operands; the target can have a different mode for
13483 mask-generating compares. */
13484 enum machine_mode mode;
13489 mode = insn_data[d->icode].operand[1].mode;
13494 type = v16qi_ftype_v16qi_v16qi;
13497 type = v8hi_ftype_v8hi_v8hi;
13500 type = v4si_ftype_v4si_v4si;
13503 type = v2di_ftype_v2di_v2di;
13506 type = v2df_ftype_v2df_v2df;
13509 type = ti_ftype_ti_ti;
13512 type = v4sf_ftype_v4sf_v4sf;
13515 type = v8qi_ftype_v8qi_v8qi;
13518 type = v4hi_ftype_v4hi_v4hi;
13521 type = v2si_ftype_v2si_v2si;
13524 type = di_ftype_di_di;
13531 /* Override for comparisons. */
13532 if (d->icode == CODE_FOR_maskcmpv4sf3
13533 || d->icode == CODE_FOR_maskncmpv4sf3
13534 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13535 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13536 type = v4si_ftype_v4sf_v4sf;
13538 if (d->icode == CODE_FOR_maskcmpv2df3
13539 || d->icode == CODE_FOR_maskncmpv2df3
13540 || d->icode == CODE_FOR_vmmaskcmpv2df3
13541 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13542 type = v2di_ftype_v2df_v2df;
13544 def_builtin (d->mask, d->name, type, d->code);
13547 /* Add the remaining MMX insns with somewhat more complicated types. */
13548 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13549 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13550 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13551 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13552 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13554 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13555 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13556 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13558 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13559 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13561 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13562 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13564 /* comi/ucomi insns. */
13565 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13566 if (d->mask == MASK_SSE2)
13567 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13569 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13571 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13572 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13573 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13575 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13576 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13577 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13578 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13579 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13580 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13581 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13582 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13583 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13584 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13585 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13587 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13588 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13590 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13592 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13593 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13596 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13597 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13599 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13600 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13601 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13602 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13604 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13605 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13606 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13607 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13609 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13611 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13613 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13614 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13615 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13616 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13617 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13618 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13620 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13622 /* Original 3DNow! */
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13640 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13641 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13642 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13644 /* 3DNow! extension as used in the Athlon CPU. */
13645 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13646 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13647 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13648 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13649 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13650 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13652 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13703 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13704 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13711 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13735 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13739 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13753 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13756 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13758 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13759 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13761 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13763 /* Prescott New Instructions. */
13764 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13765 void_ftype_pcvoid_unsigned_unsigned,
13766 IX86_BUILTIN_MONITOR);
13767 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13768 void_ftype_unsigned_unsigned,
13769 IX86_BUILTIN_MWAIT);
13770 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13772 IX86_BUILTIN_MOVSHDUP);
13773 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13775 IX86_BUILTIN_MOVSLDUP);
13776 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13777 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13778 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13779 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13780 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13781 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13784 /* Errors in the source file can cause expand_expr to return const0_rtx
13785 where we expect a vector. To avoid crashing, use one of the vector
13786 clear instructions. */
13788 safe_vector_operand (rtx x, enum machine_mode mode)
13790 if (x != const0_rtx)
13792 x = gen_reg_rtx (mode);
13794 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13795 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13796 : gen_rtx_SUBREG (DImode, x, 0)));
13798 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13799 : gen_rtx_SUBREG (V4SFmode, x, 0),
13800 CONST0_RTX (V4SFmode)));
13804 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13807 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13810 tree arg0 = TREE_VALUE (arglist);
13811 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13812 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13813 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13814 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13815 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13816 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13818 if (VECTOR_MODE_P (mode0))
13819 op0 = safe_vector_operand (op0, mode0);
13820 if (VECTOR_MODE_P (mode1))
13821 op1 = safe_vector_operand (op1, mode1);
13824 || GET_MODE (target) != tmode
13825 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13826 target = gen_reg_rtx (tmode);
13828 if (GET_MODE (op1) == SImode && mode1 == TImode)
13830 rtx x = gen_reg_rtx (V4SImode);
13831 emit_insn (gen_sse2_loadd (x, op1));
13832 op1 = gen_lowpart (TImode, x);
13835 /* In case the insn wants input operands in modes different from
13836 the result, abort. */
13837 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13838 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13841 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13842 op0 = copy_to_mode_reg (mode0, op0);
13843 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13844 op1 = copy_to_mode_reg (mode1, op1);
13846 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13847 yet one of the two must not be a memory. This is normally enforced
13848 by expanders, but we didn't bother to create one here. */
13849 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13850 op0 = copy_to_mode_reg (mode0, op0);
13852 pat = GEN_FCN (icode) (target, op0, op1);
13859 /* Subroutine of ix86_expand_builtin to take care of stores. */
13862 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13865 tree arg0 = TREE_VALUE (arglist);
13866 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13867 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13868 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13869 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13870 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13872 if (VECTOR_MODE_P (mode1))
13873 op1 = safe_vector_operand (op1, mode1);
13875 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13876 op1 = copy_to_mode_reg (mode1, op1);
13878 pat = GEN_FCN (icode) (op0, op1);
13884 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13887 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13888 rtx target, int do_load)
13891 tree arg0 = TREE_VALUE (arglist);
13892 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13893 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13894 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13897 || GET_MODE (target) != tmode
13898 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13899 target = gen_reg_rtx (tmode);
13901 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13904 if (VECTOR_MODE_P (mode0))
13905 op0 = safe_vector_operand (op0, mode0);
13907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13908 op0 = copy_to_mode_reg (mode0, op0);
13911 pat = GEN_FCN (icode) (target, op0);
13918 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13919 sqrtss, rsqrtss, rcpss. */
13922 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13925 tree arg0 = TREE_VALUE (arglist);
13926 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13927 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13928 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13931 || GET_MODE (target) != tmode
13932 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13933 target = gen_reg_rtx (tmode);
13935 if (VECTOR_MODE_P (mode0))
13936 op0 = safe_vector_operand (op0, mode0);
13938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13939 op0 = copy_to_mode_reg (mode0, op0);
13942 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13943 op1 = copy_to_mode_reg (mode0, op1);
13945 pat = GEN_FCN (icode) (target, op0, op1);
13952 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13955 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13959 tree arg0 = TREE_VALUE (arglist);
13960 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13961 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13962 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13964 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13965 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13966 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13967 enum rtx_code comparison = d->comparison;
13969 if (VECTOR_MODE_P (mode0))
13970 op0 = safe_vector_operand (op0, mode0);
13971 if (VECTOR_MODE_P (mode1))
13972 op1 = safe_vector_operand (op1, mode1);
13974 /* Swap operands if we have a comparison that isn't available in
13978 rtx tmp = gen_reg_rtx (mode1);
13979 emit_move_insn (tmp, op1);
13985 || GET_MODE (target) != tmode
13986 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13987 target = gen_reg_rtx (tmode);
13989 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13990 op0 = copy_to_mode_reg (mode0, op0);
13991 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13992 op1 = copy_to_mode_reg (mode1, op1);
13994 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13995 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14002 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14005 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14009 tree arg0 = TREE_VALUE (arglist);
14010 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14011 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14012 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14014 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14015 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14016 enum rtx_code comparison = d->comparison;
14018 if (VECTOR_MODE_P (mode0))
14019 op0 = safe_vector_operand (op0, mode0);
14020 if (VECTOR_MODE_P (mode1))
14021 op1 = safe_vector_operand (op1, mode1);
14023 /* Swap operands if we have a comparison that isn't available in
14032 target = gen_reg_rtx (SImode);
14033 emit_move_insn (target, const0_rtx);
14034 target = gen_rtx_SUBREG (QImode, target, 0);
14036 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14037 op0 = copy_to_mode_reg (mode0, op0);
14038 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14039 op1 = copy_to_mode_reg (mode1, op1);
14041 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14042 pat = GEN_FCN (d->icode) (op0, op1);
14046 emit_insn (gen_rtx_SET (VOIDmode,
14047 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14048 gen_rtx_fmt_ee (comparison, QImode,
14052 return SUBREG_REG (target);
14055 /* Expand an expression EXP that calls a built-in function,
14056 with result going to TARGET if that's convenient
14057 (and in mode MODE if that's convenient).
14058 SUBTARGET may be used as the target for computing one of EXP's operands.
14059 IGNORE is nonzero if the value is to be ignored. */
14062 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14063 enum machine_mode mode ATTRIBUTE_UNUSED,
14064 int ignore ATTRIBUTE_UNUSED)
14066 const struct builtin_description *d;
14068 enum insn_code icode;
14069 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14070 tree arglist = TREE_OPERAND (exp, 1);
14071 tree arg0, arg1, arg2;
14072 rtx op0, op1, op2, pat;
14073 enum machine_mode tmode, mode0, mode1, mode2;
14074 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14078 case IX86_BUILTIN_EMMS:
14079 emit_insn (gen_emms ());
14082 case IX86_BUILTIN_SFENCE:
14083 emit_insn (gen_sfence ());
14086 case IX86_BUILTIN_PEXTRW:
14087 case IX86_BUILTIN_PEXTRW128:
14088 icode = (fcode == IX86_BUILTIN_PEXTRW
14089 ? CODE_FOR_mmx_pextrw
14090 : CODE_FOR_sse2_pextrw);
14091 arg0 = TREE_VALUE (arglist);
14092 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14093 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14094 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14095 tmode = insn_data[icode].operand[0].mode;
14096 mode0 = insn_data[icode].operand[1].mode;
14097 mode1 = insn_data[icode].operand[2].mode;
14099 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14100 op0 = copy_to_mode_reg (mode0, op0);
14101 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14103 error ("selector must be an integer constant in the range 0..%i",
14104 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14105 return gen_reg_rtx (tmode);
14108 || GET_MODE (target) != tmode
14109 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14110 target = gen_reg_rtx (tmode);
14111 pat = GEN_FCN (icode) (target, op0, op1);
14117 case IX86_BUILTIN_PINSRW:
14118 case IX86_BUILTIN_PINSRW128:
14119 icode = (fcode == IX86_BUILTIN_PINSRW
14120 ? CODE_FOR_mmx_pinsrw
14121 : CODE_FOR_sse2_pinsrw);
14122 arg0 = TREE_VALUE (arglist);
14123 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14124 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14125 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14126 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14127 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14128 tmode = insn_data[icode].operand[0].mode;
14129 mode0 = insn_data[icode].operand[1].mode;
14130 mode1 = insn_data[icode].operand[2].mode;
14131 mode2 = insn_data[icode].operand[3].mode;
14133 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14134 op0 = copy_to_mode_reg (mode0, op0);
14135 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14136 op1 = copy_to_mode_reg (mode1, op1);
14137 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14139 error ("selector must be an integer constant in the range 0..%i",
14140 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14144 || GET_MODE (target) != tmode
14145 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14146 target = gen_reg_rtx (tmode);
14147 pat = GEN_FCN (icode) (target, op0, op1, op2);
14153 case IX86_BUILTIN_MASKMOVQ:
14154 case IX86_BUILTIN_MASKMOVDQU:
14155 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14156 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14157 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14158 : CODE_FOR_sse2_maskmovdqu));
14159 /* Note the arg order is different from the operand order. */
14160 arg1 = TREE_VALUE (arglist);
14161 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14162 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14163 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14164 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14165 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14166 mode0 = insn_data[icode].operand[0].mode;
14167 mode1 = insn_data[icode].operand[1].mode;
14168 mode2 = insn_data[icode].operand[2].mode;
14170 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14171 op0 = copy_to_mode_reg (mode0, op0);
14172 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14173 op1 = copy_to_mode_reg (mode1, op1);
14174 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14175 op2 = copy_to_mode_reg (mode2, op2);
14176 pat = GEN_FCN (icode) (op0, op1, op2);
14182 case IX86_BUILTIN_SQRTSS:
14183 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14184 case IX86_BUILTIN_RSQRTSS:
14185 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14186 case IX86_BUILTIN_RCPSS:
14187 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14189 case IX86_BUILTIN_LOADAPS:
14190 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14192 case IX86_BUILTIN_LOADUPS:
14193 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14195 case IX86_BUILTIN_STOREAPS:
14196 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14198 case IX86_BUILTIN_STOREUPS:
14199 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14201 case IX86_BUILTIN_LOADSS:
14202 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14204 case IX86_BUILTIN_STORESS:
14205 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14207 case IX86_BUILTIN_LOADHPS:
14208 case IX86_BUILTIN_LOADLPS:
14209 case IX86_BUILTIN_LOADHPD:
14210 case IX86_BUILTIN_LOADLPD:
14211 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14212 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14213 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14214 : CODE_FOR_sse2_movsd);
14215 arg0 = TREE_VALUE (arglist);
14216 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14217 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14218 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14219 tmode = insn_data[icode].operand[0].mode;
14220 mode0 = insn_data[icode].operand[1].mode;
14221 mode1 = insn_data[icode].operand[2].mode;
14223 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14224 op0 = copy_to_mode_reg (mode0, op0);
14225 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14227 || GET_MODE (target) != tmode
14228 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14229 target = gen_reg_rtx (tmode);
14230 pat = GEN_FCN (icode) (target, op0, op1);
14236 case IX86_BUILTIN_STOREHPS:
14237 case IX86_BUILTIN_STORELPS:
14238 case IX86_BUILTIN_STOREHPD:
14239 case IX86_BUILTIN_STORELPD:
14240 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14241 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14242 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14243 : CODE_FOR_sse2_movsd);
14244 arg0 = TREE_VALUE (arglist);
14245 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14246 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14247 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14248 mode0 = insn_data[icode].operand[1].mode;
14249 mode1 = insn_data[icode].operand[2].mode;
14251 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14252 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14253 op1 = copy_to_mode_reg (mode1, op1);
14255 pat = GEN_FCN (icode) (op0, op0, op1);
14261 case IX86_BUILTIN_MOVNTPS:
14262 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14263 case IX86_BUILTIN_MOVNTQ:
14264 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14266 case IX86_BUILTIN_LDMXCSR:
14267 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14268 target = assign_386_stack_local (SImode, 0);
14269 emit_move_insn (target, op0);
14270 emit_insn (gen_ldmxcsr (target));
14273 case IX86_BUILTIN_STMXCSR:
14274 target = assign_386_stack_local (SImode, 0);
14275 emit_insn (gen_stmxcsr (target));
14276 return copy_to_mode_reg (SImode, target);
14278 case IX86_BUILTIN_SHUFPS:
14279 case IX86_BUILTIN_SHUFPD:
14280 icode = (fcode == IX86_BUILTIN_SHUFPS
14281 ? CODE_FOR_sse_shufps
14282 : CODE_FOR_sse2_shufpd);
14283 arg0 = TREE_VALUE (arglist);
14284 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14285 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14286 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14287 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14288 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14289 tmode = insn_data[icode].operand[0].mode;
14290 mode0 = insn_data[icode].operand[1].mode;
14291 mode1 = insn_data[icode].operand[2].mode;
14292 mode2 = insn_data[icode].operand[3].mode;
14294 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14295 op0 = copy_to_mode_reg (mode0, op0);
14296 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14297 op1 = copy_to_mode_reg (mode1, op1);
14298 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14300 /* @@@ better error message */
14301 error ("mask must be an immediate");
14302 return gen_reg_rtx (tmode);
14305 || GET_MODE (target) != tmode
14306 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14307 target = gen_reg_rtx (tmode);
14308 pat = GEN_FCN (icode) (target, op0, op1, op2);
14314 case IX86_BUILTIN_PSHUFW:
14315 case IX86_BUILTIN_PSHUFD:
14316 case IX86_BUILTIN_PSHUFHW:
14317 case IX86_BUILTIN_PSHUFLW:
14318 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14319 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14320 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14321 : CODE_FOR_mmx_pshufw);
14322 arg0 = TREE_VALUE (arglist);
14323 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14324 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14325 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14326 tmode = insn_data[icode].operand[0].mode;
14327 mode1 = insn_data[icode].operand[1].mode;
14328 mode2 = insn_data[icode].operand[2].mode;
14330 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14331 op0 = copy_to_mode_reg (mode1, op0);
14332 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14334 /* @@@ better error message */
14335 error ("mask must be an immediate");
14339 || GET_MODE (target) != tmode
14340 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14341 target = gen_reg_rtx (tmode);
14342 pat = GEN_FCN (icode) (target, op0, op1);
14348 case IX86_BUILTIN_PSLLDQI128:
14349 case IX86_BUILTIN_PSRLDQI128:
14350 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14351 : CODE_FOR_sse2_lshrti3);
14352 arg0 = TREE_VALUE (arglist);
14353 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14354 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14355 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14356 tmode = insn_data[icode].operand[0].mode;
14357 mode1 = insn_data[icode].operand[1].mode;
14358 mode2 = insn_data[icode].operand[2].mode;
14360 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14362 op0 = copy_to_reg (op0);
14363 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14365 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14367 error ("shift must be an immediate");
14370 target = gen_reg_rtx (V2DImode);
14371 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14377 case IX86_BUILTIN_FEMMS:
14378 emit_insn (gen_femms ());
14381 case IX86_BUILTIN_PAVGUSB:
14382 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14384 case IX86_BUILTIN_PF2ID:
14385 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14387 case IX86_BUILTIN_PFACC:
14388 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14390 case IX86_BUILTIN_PFADD:
14391 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14393 case IX86_BUILTIN_PFCMPEQ:
14394 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14396 case IX86_BUILTIN_PFCMPGE:
14397 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14399 case IX86_BUILTIN_PFCMPGT:
14400 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14402 case IX86_BUILTIN_PFMAX:
14403 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14405 case IX86_BUILTIN_PFMIN:
14406 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14408 case IX86_BUILTIN_PFMUL:
14409 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14411 case IX86_BUILTIN_PFRCP:
14412 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14414 case IX86_BUILTIN_PFRCPIT1:
14415 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14417 case IX86_BUILTIN_PFRCPIT2:
14418 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14420 case IX86_BUILTIN_PFRSQIT1:
14421 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14423 case IX86_BUILTIN_PFRSQRT:
14424 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14426 case IX86_BUILTIN_PFSUB:
14427 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14429 case IX86_BUILTIN_PFSUBR:
14430 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14432 case IX86_BUILTIN_PI2FD:
14433 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14435 case IX86_BUILTIN_PMULHRW:
14436 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14438 case IX86_BUILTIN_PF2IW:
14439 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14441 case IX86_BUILTIN_PFNACC:
14442 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14444 case IX86_BUILTIN_PFPNACC:
14445 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14447 case IX86_BUILTIN_PI2FW:
14448 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14450 case IX86_BUILTIN_PSWAPDSI:
14451 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14453 case IX86_BUILTIN_PSWAPDSF:
14454 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14456 case IX86_BUILTIN_SSE_ZERO:
14457 target = gen_reg_rtx (V4SFmode);
14458 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14461 case IX86_BUILTIN_MMX_ZERO:
14462 target = gen_reg_rtx (DImode);
14463 emit_insn (gen_mmx_clrdi (target));
14466 case IX86_BUILTIN_CLRTI:
14467 target = gen_reg_rtx (V2DImode);
14468 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14472 case IX86_BUILTIN_SQRTSD:
14473 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14474 case IX86_BUILTIN_LOADAPD:
14475 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14476 case IX86_BUILTIN_LOADUPD:
14477 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14479 case IX86_BUILTIN_STOREAPD:
14480 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14481 case IX86_BUILTIN_STOREUPD:
14482 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14484 case IX86_BUILTIN_LOADSD:
14485 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14487 case IX86_BUILTIN_STORESD:
14488 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14490 case IX86_BUILTIN_SETPD1:
14491 target = assign_386_stack_local (DFmode, 0);
14492 arg0 = TREE_VALUE (arglist);
14493 emit_move_insn (adjust_address (target, DFmode, 0),
14494 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14495 op0 = gen_reg_rtx (V2DFmode);
14496 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14497 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14500 case IX86_BUILTIN_SETPD:
14501 target = assign_386_stack_local (V2DFmode, 0);
14502 arg0 = TREE_VALUE (arglist);
14503 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14504 emit_move_insn (adjust_address (target, DFmode, 0),
14505 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14506 emit_move_insn (adjust_address (target, DFmode, 8),
14507 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14508 op0 = gen_reg_rtx (V2DFmode);
14509 emit_insn (gen_sse2_movapd (op0, target));
14512 case IX86_BUILTIN_LOADRPD:
14513 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14514 gen_reg_rtx (V2DFmode), 1);
14515 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14518 case IX86_BUILTIN_LOADPD1:
14519 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14520 gen_reg_rtx (V2DFmode), 1);
14521 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14524 case IX86_BUILTIN_STOREPD1:
14525 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14526 case IX86_BUILTIN_STORERPD:
14527 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14529 case IX86_BUILTIN_CLRPD:
14530 target = gen_reg_rtx (V2DFmode);
14531 emit_insn (gen_sse_clrv2df (target));
14534 case IX86_BUILTIN_MFENCE:
14535 emit_insn (gen_sse2_mfence ());
14537 case IX86_BUILTIN_LFENCE:
14538 emit_insn (gen_sse2_lfence ());
14541 case IX86_BUILTIN_CLFLUSH:
14542 arg0 = TREE_VALUE (arglist);
14543 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14544 icode = CODE_FOR_sse2_clflush;
14545 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14546 op0 = copy_to_mode_reg (Pmode, op0);
14548 emit_insn (gen_sse2_clflush (op0));
14551 case IX86_BUILTIN_MOVNTPD:
14552 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14553 case IX86_BUILTIN_MOVNTDQ:
14554 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14555 case IX86_BUILTIN_MOVNTI:
14556 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14558 case IX86_BUILTIN_LOADDQA:
14559 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14560 case IX86_BUILTIN_LOADDQU:
14561 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14562 case IX86_BUILTIN_LOADD:
14563 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14565 case IX86_BUILTIN_STOREDQA:
14566 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14567 case IX86_BUILTIN_STOREDQU:
14568 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14569 case IX86_BUILTIN_STORED:
14570 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14572 case IX86_BUILTIN_MONITOR:
14573 arg0 = TREE_VALUE (arglist);
14574 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14575 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14576 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14577 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14578 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14580 op0 = copy_to_mode_reg (SImode, op0);
14582 op1 = copy_to_mode_reg (SImode, op1);
14584 op2 = copy_to_mode_reg (SImode, op2);
14585 emit_insn (gen_monitor (op0, op1, op2));
14588 case IX86_BUILTIN_MWAIT:
14589 arg0 = TREE_VALUE (arglist);
14590 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14591 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14592 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14594 op0 = copy_to_mode_reg (SImode, op0);
14596 op1 = copy_to_mode_reg (SImode, op1);
14597 emit_insn (gen_mwait (op0, op1));
14600 case IX86_BUILTIN_LOADDDUP:
14601 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14603 case IX86_BUILTIN_LDDQU:
14604 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14611 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14612 if (d->code == fcode)
14614 /* Compares are treated specially. */
14615 if (d->icode == CODE_FOR_maskcmpv4sf3
14616 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14617 || d->icode == CODE_FOR_maskncmpv4sf3
14618 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14619 || d->icode == CODE_FOR_maskcmpv2df3
14620 || d->icode == CODE_FOR_vmmaskcmpv2df3
14621 || d->icode == CODE_FOR_maskncmpv2df3
14622 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14623 return ix86_expand_sse_compare (d, arglist, target);
14625 return ix86_expand_binop_builtin (d->icode, arglist, target);
14628 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14629 if (d->code == fcode)
14630 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14632 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14633 if (d->code == fcode)
14634 return ix86_expand_sse_comi (d, arglist, target);
14636 /* @@@ Should really do something sensible here. */
14640 /* Store OPERAND to the memory after reload is completed. This means
14641 that we can't easily use assign_stack_local. */
14643 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14646 if (!reload_completed)
14648 if (TARGET_RED_ZONE)
14650 result = gen_rtx_MEM (mode,
14651 gen_rtx_PLUS (Pmode,
14653 GEN_INT (-RED_ZONE_SIZE)));
14654 emit_move_insn (result, operand);
14656 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14662 operand = gen_lowpart (DImode, operand);
14666 gen_rtx_SET (VOIDmode,
14667 gen_rtx_MEM (DImode,
14668 gen_rtx_PRE_DEC (DImode,
14669 stack_pointer_rtx)),
14675 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14684 split_di (&operand, 1, operands, operands + 1);
14686 gen_rtx_SET (VOIDmode,
14687 gen_rtx_MEM (SImode,
14688 gen_rtx_PRE_DEC (Pmode,
14689 stack_pointer_rtx)),
14692 gen_rtx_SET (VOIDmode,
14693 gen_rtx_MEM (SImode,
14694 gen_rtx_PRE_DEC (Pmode,
14695 stack_pointer_rtx)),
14700 /* It is better to store HImodes as SImodes. */
14701 if (!TARGET_PARTIAL_REG_STALL)
14702 operand = gen_lowpart (SImode, operand);
14706 gen_rtx_SET (VOIDmode,
14707 gen_rtx_MEM (GET_MODE (operand),
14708 gen_rtx_PRE_DEC (SImode,
14709 stack_pointer_rtx)),
14715 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14720 /* Free operand from the memory. */
14722 ix86_free_from_memory (enum machine_mode mode)
14724 if (!TARGET_RED_ZONE)
14728 if (mode == DImode || TARGET_64BIT)
14730 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14734 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14735 to pop or add instruction if registers are available. */
14736 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14737 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14742 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14743 QImode must go into class Q_REGS.
14744 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14745 movdf to do mem-to-mem moves through integer regs. */
14747 ix86_preferred_reload_class (rtx x, enum reg_class class)
14749 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14751 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14753 /* SSE can't load any constant directly yet. */
14754 if (SSE_CLASS_P (class))
14756 /* Floats can load 0 and 1. */
14757 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14759 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14760 if (MAYBE_SSE_CLASS_P (class))
14761 return (reg_class_subset_p (class, GENERAL_REGS)
14762 ? GENERAL_REGS : FLOAT_REGS);
14766 /* General regs can load everything. */
14767 if (reg_class_subset_p (class, GENERAL_REGS))
14768 return GENERAL_REGS;
14769 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14770 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14773 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14775 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14780 /* If we are copying between general and FP registers, we need a memory
14781 location. The same is true for SSE and MMX registers.
14783 The macro can't work reliably when one of the CLASSES is class containing
14784 registers from multiple units (SSE, MMX, integer). We avoid this by never
14785 combining those units in single alternative in the machine description.
14786 Ensure that this constraint holds to avoid unexpected surprises.
14788 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14789 enforce these sanity checks. */
14791 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14792 enum machine_mode mode, int strict)
14794 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14795 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14796 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14797 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14798 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14799 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14806 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14807 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14808 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14809 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14810 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14812 /* Return the cost of moving data from a register in class CLASS1 to
14813 one in class CLASS2.
14815 It is not required that the cost always equal 2 when FROM is the same as TO;
14816 on some machines it is expensive to move between registers if they are not
14817 general registers. */
14819 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14820 enum reg_class class2)
14822 /* In case we require secondary memory, compute cost of the store followed
14823 by load. In order to avoid bad register allocation choices, we need
14824 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14826 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14830 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14831 MEMORY_MOVE_COST (mode, class1, 1));
14832 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14833 MEMORY_MOVE_COST (mode, class2, 1));
14835 /* In case of copying from general_purpose_register we may emit multiple
14836 stores followed by single load causing memory size mismatch stall.
14837 Count this as arbitrarily high cost of 20. */
14838 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14841 /* In the case of FP/MMX moves, the registers actually overlap, and we
14842 have to switch modes in order to treat them differently. */
14843 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14844 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14850 /* Moves between SSE/MMX and integer unit are expensive. */
14851 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14852 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14853 return ix86_cost->mmxsse_to_integer;
14854 if (MAYBE_FLOAT_CLASS_P (class1))
14855 return ix86_cost->fp_move;
14856 if (MAYBE_SSE_CLASS_P (class1))
14857 return ix86_cost->sse_move;
14858 if (MAYBE_MMX_CLASS_P (class1))
14859 return ix86_cost->mmx_move;
14863 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14865 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14867 /* Flags and only flags can only hold CCmode values. */
14868 if (CC_REGNO_P (regno))
14869 return GET_MODE_CLASS (mode) == MODE_CC;
14870 if (GET_MODE_CLASS (mode) == MODE_CC
14871 || GET_MODE_CLASS (mode) == MODE_RANDOM
14872 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14874 if (FP_REGNO_P (regno))
14875 return VALID_FP_MODE_P (mode);
14876 if (SSE_REGNO_P (regno))
14877 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14878 if (MMX_REGNO_P (regno))
14880 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14881 /* We handle both integer and floats in the general purpose registers.
14882 In future we should be able to handle vector modes as well. */
14883 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14885 /* Take care for QImode values - they can be in non-QI regs, but then
14886 they do cause partial register stalls. */
14887 if (regno < 4 || mode != QImode || TARGET_64BIT)
14889 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14892 /* Return the cost of moving data of mode M between a
14893 register and memory. A value of 2 is the default; this cost is
14894 relative to those in `REGISTER_MOVE_COST'.
14896 If moving between registers and memory is more expensive than
14897 between two registers, you should define this macro to express the
14900 Model also increased moving costs of QImode registers in non
14904 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14906 if (FLOAT_CLASS_P (class))
14923 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14925 if (SSE_CLASS_P (class))
14928 switch (GET_MODE_SIZE (mode))
14942 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14944 if (MMX_CLASS_P (class))
14947 switch (GET_MODE_SIZE (mode))
14958 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14960 switch (GET_MODE_SIZE (mode))
14964 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14965 : ix86_cost->movzbl_load);
14967 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14968 : ix86_cost->int_store[0] + 4);
14971 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14973 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14974 if (mode == TFmode)
14976 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14977 * (((int) GET_MODE_SIZE (mode)
14978 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14982 /* Compute a (partial) cost for rtx X. Return true if the complete
14983 cost has been computed, and false if subexpressions should be
14984 scanned. In either case, *TOTAL contains the cost result. */
14987 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14989 enum machine_mode mode = GET_MODE (x);
14997 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14999 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15001 else if (flag_pic && SYMBOLIC_CONST (x)
15003 || (!GET_CODE (x) != LABEL_REF
15004 && (GET_CODE (x) != SYMBOL_REF
15005 || !SYMBOL_REF_LOCAL_P (x)))))
15012 if (mode == VOIDmode)
15015 switch (standard_80387_constant_p (x))
15020 default: /* Other constants */
15025 /* Start with (MEM (SYMBOL_REF)), since that's where
15026 it'll probably end up. Add a penalty for size. */
15027 *total = (COSTS_N_INSNS (1)
15028 + (flag_pic != 0 && !TARGET_64BIT)
15029 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15035 /* The zero extensions is often completely free on x86_64, so make
15036 it as cheap as possible. */
15037 if (TARGET_64BIT && mode == DImode
15038 && GET_MODE (XEXP (x, 0)) == SImode)
15040 else if (TARGET_ZERO_EXTEND_WITH_AND)
15041 *total = COSTS_N_INSNS (ix86_cost->add);
15043 *total = COSTS_N_INSNS (ix86_cost->movzx);
15047 *total = COSTS_N_INSNS (ix86_cost->movsx);
15051 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15052 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15054 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15057 *total = COSTS_N_INSNS (ix86_cost->add);
15060 if ((value == 2 || value == 3)
15061 && !TARGET_DECOMPOSE_LEA
15062 && ix86_cost->lea <= ix86_cost->shift_const)
15064 *total = COSTS_N_INSNS (ix86_cost->lea);
15074 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15076 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15078 if (INTVAL (XEXP (x, 1)) > 32)
15079 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15081 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15085 if (GET_CODE (XEXP (x, 1)) == AND)
15086 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15088 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15093 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15094 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15096 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15101 if (FLOAT_MODE_P (mode))
15102 *total = COSTS_N_INSNS (ix86_cost->fmul);
15103 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15105 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15108 for (nbits = 0; value != 0; value >>= 1)
15111 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15112 + nbits * ix86_cost->mult_bit);
15116 /* This is arbitrary */
15117 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15118 + 7 * ix86_cost->mult_bit);
15126 if (FLOAT_MODE_P (mode))
15127 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15129 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15133 if (FLOAT_MODE_P (mode))
15134 *total = COSTS_N_INSNS (ix86_cost->fadd);
15135 else if (!TARGET_DECOMPOSE_LEA
15136 && GET_MODE_CLASS (mode) == MODE_INT
15137 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15139 if (GET_CODE (XEXP (x, 0)) == PLUS
15140 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15141 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15142 && CONSTANT_P (XEXP (x, 1)))
15144 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15145 if (val == 2 || val == 4 || val == 8)
15147 *total = COSTS_N_INSNS (ix86_cost->lea);
15148 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15149 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15151 *total += rtx_cost (XEXP (x, 1), outer_code);
15155 else if (GET_CODE (XEXP (x, 0)) == MULT
15156 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15158 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15159 if (val == 2 || val == 4 || val == 8)
15161 *total = COSTS_N_INSNS (ix86_cost->lea);
15162 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15163 *total += rtx_cost (XEXP (x, 1), outer_code);
15167 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15169 *total = COSTS_N_INSNS (ix86_cost->lea);
15170 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15171 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15172 *total += rtx_cost (XEXP (x, 1), outer_code);
15179 if (FLOAT_MODE_P (mode))
15181 *total = COSTS_N_INSNS (ix86_cost->fadd);
15189 if (!TARGET_64BIT && mode == DImode)
15191 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15192 + (rtx_cost (XEXP (x, 0), outer_code)
15193 << (GET_MODE (XEXP (x, 0)) != DImode))
15194 + (rtx_cost (XEXP (x, 1), outer_code)
15195 << (GET_MODE (XEXP (x, 1)) != DImode)));
15201 if (FLOAT_MODE_P (mode))
15203 *total = COSTS_N_INSNS (ix86_cost->fchs);
15209 if (!TARGET_64BIT && mode == DImode)
15210 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15212 *total = COSTS_N_INSNS (ix86_cost->add);
15216 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15221 if (FLOAT_MODE_P (mode))
15222 *total = COSTS_N_INSNS (ix86_cost->fabs);
15226 if (FLOAT_MODE_P (mode))
15227 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15231 if (XINT (x, 1) == UNSPEC_TP)
15240 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15242 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15245 fputs ("\tpushl $", asm_out_file);
15246 assemble_name (asm_out_file, XSTR (symbol, 0));
15247 fputc ('\n', asm_out_file);
15253 static int current_machopic_label_num;
15255 /* Given a symbol name and its associated stub, write out the
15256 definition of the stub. */
15259 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15261 unsigned int length;
15262 char *binder_name, *symbol_name, lazy_ptr_name[32];
15263 int label = ++current_machopic_label_num;
15265 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15266 symb = (*targetm.strip_name_encoding) (symb);
15268 length = strlen (stub);
15269 binder_name = alloca (length + 32);
15270 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15272 length = strlen (symb);
15273 symbol_name = alloca (length + 32);
15274 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15276 sprintf (lazy_ptr_name, "L%d$lz", label);
15279 machopic_picsymbol_stub_section ();
15281 machopic_symbol_stub_section ();
15283 fprintf (file, "%s:\n", stub);
15284 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15288 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15289 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15290 fprintf (file, "\tjmp %%edx\n");
15293 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15295 fprintf (file, "%s:\n", binder_name);
15299 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15300 fprintf (file, "\tpushl %%eax\n");
15303 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15305 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15307 machopic_lazy_symbol_ptr_section ();
15308 fprintf (file, "%s:\n", lazy_ptr_name);
15309 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15310 fprintf (file, "\t.long %s\n", binder_name);
15312 #endif /* TARGET_MACHO */
15314 /* Order the registers for register allocator. */
15317 x86_order_regs_for_local_alloc (void)
15322 /* First allocate the local general purpose registers. */
15323 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15324 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15325 reg_alloc_order [pos++] = i;
15327 /* Global general purpose registers. */
15328 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15329 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15330 reg_alloc_order [pos++] = i;
15332 /* x87 registers come first in case we are doing FP math
15334 if (!TARGET_SSE_MATH)
15335 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15336 reg_alloc_order [pos++] = i;
15338 /* SSE registers. */
15339 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15340 reg_alloc_order [pos++] = i;
15341 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15342 reg_alloc_order [pos++] = i;
15344 /* x87 registers. */
15345 if (TARGET_SSE_MATH)
15346 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15347 reg_alloc_order [pos++] = i;
15349 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15350 reg_alloc_order [pos++] = i;
15352 /* Initialize the rest of array as we do not allocate some registers
15354 while (pos < FIRST_PSEUDO_REGISTER)
15355 reg_alloc_order [pos++] = 0;
15358 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15359 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15362 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15363 struct attribute_spec.handler. */
15365 ix86_handle_struct_attribute (tree *node, tree name,
15366 tree args ATTRIBUTE_UNUSED,
15367 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15370 if (DECL_P (*node))
15372 if (TREE_CODE (*node) == TYPE_DECL)
15373 type = &TREE_TYPE (*node);
15378 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15379 || TREE_CODE (*type) == UNION_TYPE)))
15381 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15382 *no_add_attrs = true;
15385 else if ((is_attribute_p ("ms_struct", name)
15386 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15387 || ((is_attribute_p ("gcc_struct", name)
15388 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15390 warning ("`%s' incompatible attribute ignored",
15391 IDENTIFIER_POINTER (name));
15392 *no_add_attrs = true;
15399 ix86_ms_bitfield_layout_p (tree record_type)
15401 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15402 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15403 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15406 /* Returns an expression indicating where the this parameter is
15407 located on entry to the FUNCTION. */
15410 x86_this_parameter (tree function)
15412 tree type = TREE_TYPE (function);
15416 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15417 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15420 if (ix86_function_regparm (type, function) > 0)
15424 parm = TYPE_ARG_TYPES (type);
15425 /* Figure out whether or not the function has a variable number of
15427 for (; parm; parm = TREE_CHAIN (parm))
15428 if (TREE_VALUE (parm) == void_type_node)
15430 /* If not, the this parameter is in the first argument. */
15434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15436 return gen_rtx_REG (SImode, regno);
15440 if (aggregate_value_p (TREE_TYPE (type), type))
15441 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15443 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15446 /* Determine whether x86_output_mi_thunk can succeed. */
15449 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15450 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15451 HOST_WIDE_INT vcall_offset, tree function)
15453 /* 64-bit can handle anything. */
15457 /* For 32-bit, everything's fine if we have one free register. */
15458 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15461 /* Need a free register for vcall_offset. */
15465 /* Need a free register for GOT references. */
15466 if (flag_pic && !(*targetm.binds_local_p) (function))
15469 /* Otherwise ok. */
15473 /* Output the assembler code for a thunk function. THUNK_DECL is the
15474 declaration for the thunk function itself, FUNCTION is the decl for
15475 the target function. DELTA is an immediate constant offset to be
15476 added to THIS. If VCALL_OFFSET is nonzero, the word at
15477 *(*this + vcall_offset) should be added to THIS. */
15480 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15481 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15482 HOST_WIDE_INT vcall_offset, tree function)
15485 rtx this = x86_this_parameter (function);
15488 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15489 pull it in now and let DELTA benefit. */
15492 else if (vcall_offset)
15494 /* Put the this parameter into %eax. */
15496 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15497 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15500 this_reg = NULL_RTX;
15502 /* Adjust the this parameter by a fixed constant. */
15505 xops[0] = GEN_INT (delta);
15506 xops[1] = this_reg ? this_reg : this;
15509 if (!x86_64_general_operand (xops[0], DImode))
15511 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15513 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15517 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15520 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15523 /* Adjust the this parameter by a value stored in the vtable. */
15527 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15530 int tmp_regno = 2 /* ECX */;
15531 if (lookup_attribute ("fastcall",
15532 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15533 tmp_regno = 0 /* EAX */;
15534 tmp = gen_rtx_REG (SImode, tmp_regno);
15537 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15540 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15542 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15544 /* Adjust the this parameter. */
15545 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15546 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15548 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15549 xops[0] = GEN_INT (vcall_offset);
15551 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15552 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15554 xops[1] = this_reg;
15556 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15558 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15561 /* If necessary, drop THIS back to its stack slot. */
15562 if (this_reg && this_reg != this)
15564 xops[0] = this_reg;
15566 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15569 xops[0] = XEXP (DECL_RTL (function), 0);
15572 if (!flag_pic || (*targetm.binds_local_p) (function))
15573 output_asm_insn ("jmp\t%P0", xops);
15576 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15577 tmp = gen_rtx_CONST (Pmode, tmp);
15578 tmp = gen_rtx_MEM (QImode, tmp);
15580 output_asm_insn ("jmp\t%A0", xops);
15585 if (!flag_pic || (*targetm.binds_local_p) (function))
15586 output_asm_insn ("jmp\t%P0", xops);
15591 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15592 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15593 tmp = gen_rtx_MEM (QImode, tmp);
15595 output_asm_insn ("jmp\t%0", xops);
15598 #endif /* TARGET_MACHO */
15600 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15601 output_set_got (tmp);
15604 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15605 output_asm_insn ("jmp\t{*}%1", xops);
15611 x86_file_start (void)
15613 default_file_start ();
15614 if (X86_FILE_START_VERSION_DIRECTIVE)
15615 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15616 if (X86_FILE_START_FLTUSED)
15617 fputs ("\t.global\t__fltused\n", asm_out_file);
15618 if (ix86_asm_dialect == ASM_INTEL)
15619 fputs ("\t.intel_syntax\n", asm_out_file);
15623 x86_field_alignment (tree field, int computed)
15625 enum machine_mode mode;
15626 tree type = TREE_TYPE (field);
15628 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15630 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15631 ? get_inner_array_type (type) : type);
15632 if (mode == DFmode || mode == DCmode
15633 || GET_MODE_CLASS (mode) == MODE_INT
15634 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15635 return MIN (32, computed);
15639 /* Output assembler code to FILE to increment profiler label # LABELNO
15640 for profiling a function entry. */
15642 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15647 #ifndef NO_PROFILE_COUNTERS
15648 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15650 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15654 #ifndef NO_PROFILE_COUNTERS
15655 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15657 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15661 #ifndef NO_PROFILE_COUNTERS
15662 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15663 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15665 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15669 #ifndef NO_PROFILE_COUNTERS
15670 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15671 PROFILE_COUNT_REGISTER);
15673 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15677 /* We don't have exact information about the insn sizes, but we may assume
15678 quite safely that we are informed about all 1 byte insns and memory
15679 address sizes. This is enough to eliminate unnecessary padding in
15683 min_insn_size (rtx insn)
15687 if (!INSN_P (insn) || !active_insn_p (insn))
15690 /* Discard alignments we've emit and jump instructions. */
15691 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15692 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15694 if (GET_CODE (insn) == JUMP_INSN
15695 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15696 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15699 /* Important case - calls are always 5 bytes.
15700 It is common to have many calls in the row. */
15701 if (GET_CODE (insn) == CALL_INSN
15702 && symbolic_reference_mentioned_p (PATTERN (insn))
15703 && !SIBLING_CALL_P (insn))
15705 if (get_attr_length (insn) <= 1)
15708 /* For normal instructions we may rely on the sizes of addresses
15709 and the presence of symbol to require 4 bytes of encoding.
15710 This is not the case for jumps where references are PC relative. */
15711 if (GET_CODE (insn) != JUMP_INSN)
15713 l = get_attr_length_address (insn);
15714 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15723 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15727 k8_avoid_jump_misspredicts (void)
15729 rtx insn, start = get_insns ();
15730 int nbytes = 0, njumps = 0;
15733 /* Look for all minimal intervals of instructions containing 4 jumps.
15734 The intervals are bounded by START and INSN. NBYTES is the total
15735 size of instructions in the interval including INSN and not including
15736 START. When the NBYTES is smaller than 16 bytes, it is possible
15737 that the end of START and INSN ends up in the same 16byte page.
15739 The smallest offset in the page INSN can start is the case where START
15740 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15741 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15743 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15746 nbytes += min_insn_size (insn);
15748 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15749 INSN_UID (insn), min_insn_size (insn));
15750 if ((GET_CODE (insn) == JUMP_INSN
15751 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15752 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15753 || GET_CODE (insn) == CALL_INSN)
15760 start = NEXT_INSN (start);
15761 if ((GET_CODE (start) == JUMP_INSN
15762 && GET_CODE (PATTERN (start)) != ADDR_VEC
15763 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15764 || GET_CODE (start) == CALL_INSN)
15765 njumps--, isjump = 1;
15768 nbytes -= min_insn_size (start);
15773 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15774 INSN_UID (start), INSN_UID (insn), nbytes);
15776 if (njumps == 3 && isjump && nbytes < 16)
15778 int padsize = 15 - nbytes + min_insn_size (insn);
15781 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15782 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15787 /* Implement machine specific optimizations.
15788 At the moment we implement single transformation: AMD Athlon works faster
15789 when RET is not destination of conditional jump or directly preceded
15790 by other jump instruction. We avoid the penalty by inserting NOP just
15791 before the RET instructions in such cases. */
15797 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15799 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15801 basic_block bb = e->src;
15802 rtx ret = BB_END (bb);
15804 bool replace = false;
15806 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15807 || !maybe_hot_bb_p (bb))
15809 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15810 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15812 if (prev && GET_CODE (prev) == CODE_LABEL)
15815 for (e = bb->pred; e; e = e->pred_next)
15816 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15817 && !(e->flags & EDGE_FALLTHRU))
15822 prev = prev_active_insn (ret);
15824 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15825 || GET_CODE (prev) == CALL_INSN))
15827 /* Empty functions get branch mispredict even when the jump destination
15828 is not visible to us. */
15829 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15834 emit_insn_before (gen_return_internal_long (), ret);
15838 k8_avoid_jump_misspredicts ();
15841 /* Return nonzero when QImode register that must be represented via REX prefix
15844 x86_extended_QIreg_mentioned_p (rtx insn)
15847 extract_insn_cached (insn);
15848 for (i = 0; i < recog_data.n_operands; i++)
15849 if (REG_P (recog_data.operand[i])
15850 && REGNO (recog_data.operand[i]) >= 4)
15855 /* Return nonzero when P points to register encoded via REX prefix.
15856 Called via for_each_rtx. */
15858 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15860 unsigned int regno;
15863 regno = REGNO (*p);
15864 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15867 /* Return true when INSN mentions register that must be encoded using REX
15870 x86_extended_reg_mentioned_p (rtx insn)
15872 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15875 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15876 optabs would emit if we didn't have TFmode patterns. */
15879 x86_emit_floatuns (rtx operands[2])
15881 rtx neglab, donelab, i0, i1, f0, in, out;
15882 enum machine_mode mode, inmode;
15884 inmode = GET_MODE (operands[1]);
15885 if (inmode != SImode
15886 && inmode != DImode)
15890 in = force_reg (inmode, operands[1]);
15891 mode = GET_MODE (out);
15892 neglab = gen_label_rtx ();
15893 donelab = gen_label_rtx ();
15894 i1 = gen_reg_rtx (Pmode);
15895 f0 = gen_reg_rtx (mode);
15897 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15899 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15900 emit_jump_insn (gen_jump (donelab));
15903 emit_label (neglab);
15905 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15906 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15907 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15908 expand_float (f0, i0, 0);
15909 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15911 emit_label (donelab);
15914 /* Return if we do not know how to pass TYPE solely in registers. */
15916 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15918 if (default_must_pass_in_stack (mode, type))
15920 return (!TARGET_64BIT && type && mode == TImode);
15923 /* Initialize vector TARGET via VALS. */
15925 ix86_expand_vector_init (rtx target, rtx vals)
15927 enum machine_mode mode = GET_MODE (target);
15928 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15929 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15932 for (i = n_elts - 1; i >= 0; i--)
15933 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15934 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15937 /* Few special cases first...
15938 ... constants are best loaded from constant pool. */
15941 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15945 /* ... values where only first field is non-constant are best loaded
15946 from the pool and overwriten via move later. */
15949 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15950 GET_MODE_INNER (mode), 0);
15952 op = force_reg (mode, op);
15953 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15954 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15955 switch (GET_MODE (target))
15958 emit_insn (gen_sse2_movsd (target, target, op));
15961 emit_insn (gen_sse_movss (target, target, op));
15969 /* And the busy sequence doing rotations. */
15970 switch (GET_MODE (target))
15975 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15977 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15979 vecop0 = force_reg (V2DFmode, vecop0);
15980 vecop1 = force_reg (V2DFmode, vecop1);
15981 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15987 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15989 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15991 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15993 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15994 rtx tmp1 = gen_reg_rtx (V4SFmode);
15995 rtx tmp2 = gen_reg_rtx (V4SFmode);
15997 vecop0 = force_reg (V4SFmode, vecop0);
15998 vecop1 = force_reg (V4SFmode, vecop1);
15999 vecop2 = force_reg (V4SFmode, vecop2);
16000 vecop3 = force_reg (V4SFmode, vecop3);
16001 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16002 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16003 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16011 #include "gt-i386.h"