1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
837 rtx base, index, disp;
839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
842 static int ix86_decompose_address (rtx, struct ix86_address *);
843 static int ix86_address_cost (rtx);
844 static bool ix86_cannot_force_const_mem (rtx);
845 static rtx ix86_delegitimize_address (rtx);
847 struct builtin_description;
848 static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855 static rtx ix86_expand_store_builtin (enum insn_code, tree);
856 static rtx safe_vector_operand (rtx, enum machine_mode);
857 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864 static int ix86_fp_comparison_cost (enum rtx_code code);
865 static unsigned int ix86_select_alt_pic_regnum (void);
866 static int ix86_save_reg (unsigned int, int);
867 static void ix86_compute_frame_layout (struct ix86_frame *);
868 static int ix86_comp_type_attributes (tree, tree);
869 static int ix86_fntype_regparm (tree);
870 const struct attribute_spec ix86_attribute_table[];
871 static bool ix86_function_ok_for_sibcall (tree, tree);
872 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874 static int ix86_value_regno (enum machine_mode);
875 static bool contains_128bit_aligned_vector_p (tree);
876 static bool ix86_ms_bitfield_layout_p (tree);
877 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878 static int extended_reg_mentioned_1 (rtx *, void *);
879 static bool ix86_rtx_costs (rtx, int, int, int *);
880 static int min_insn_size (rtx);
881 static void k8_avoid_jump_misspredicts (void);
883 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
884 static void ix86_svr3_asm_out_constructor (rtx, int);
887 /* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
893 whenever possible (upper half does contain padding).
895 enum x86_64_reg_class
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
908 static const char * const x86_64_reg_class_name[] =
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911 #define MAX_CLASSES 4
912 static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914 static int examine_argument (enum machine_mode, tree, int, int *, int *);
915 static rtx construct_container (enum machine_mode, tree, int, int, int,
917 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
920 /* Table of constants used by fldpi, fldln2, etc... */
921 static REAL_VALUE_TYPE ext_80387_constants_table [5];
922 static bool ext_80387_constants_init = 0;
923 static void init_ext_80387_constants (void);
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_ASM_FILE_START
1005 #define TARGET_ASM_FILE_START x86_file_start
1007 #undef TARGET_RTX_COSTS
1008 #define TARGET_RTX_COSTS ix86_rtx_costs
1009 #undef TARGET_ADDRESS_COST
1010 #define TARGET_ADDRESS_COST ix86_address_cost
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 struct gcc_target targetm = TARGET_INITIALIZER;
1017 /* The svr4 ABI for the i386 says that records and unions are returned
1019 #ifndef DEFAULT_PCC_STRUCT_RETURN
1020 #define DEFAULT_PCC_STRUCT_RETURN 1
1023 /* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1033 override_options (void)
1036 /* Comes from final.c -- no real reason to change it. */
1037 #define MAX_CODE_ALIGN 16
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
1045 const int align_loop_max_skip;
1046 const int align_jump;
1047 const int align_jump_max_skip;
1048 const int align_func;
1050 const processor_target_table[PROCESSOR_max] =
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
1067 const enum pta_flags
1072 PTA_PREFETCH_SSE = 8,
1078 const processor_alias_table[] =
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1094 PTA_MMX | PTA_PREFETCH_SSE},
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1101 | PTA_3DNOW | PTA_3DNOW_A},
1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1103 | PTA_3DNOW_A | PTA_SSE},
1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
1114 /* By default our XFmode is the 80-bit extended format. If we have
1115 use TFmode instead, it's also the 80-bit format, but with padding. */
1116 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1117 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1119 /* Set the default values for switches whose default depends on TARGET_64BIT
1120 in case they weren't overwritten by command line options. */
1123 if (flag_omit_frame_pointer == 2)
1124 flag_omit_frame_pointer = 1;
1125 if (flag_asynchronous_unwind_tables == 2)
1126 flag_asynchronous_unwind_tables = 1;
1127 if (flag_pcc_struct_return == 2)
1128 flag_pcc_struct_return = 0;
1132 if (flag_omit_frame_pointer == 2)
1133 flag_omit_frame_pointer = 0;
1134 if (flag_asynchronous_unwind_tables == 2)
1135 flag_asynchronous_unwind_tables = 0;
1136 if (flag_pcc_struct_return == 2)
1137 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1140 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1141 SUBTARGET_OVERRIDE_OPTIONS;
1144 if (!ix86_tune_string && ix86_arch_string)
1145 ix86_tune_string = ix86_arch_string;
1146 if (!ix86_tune_string)
1147 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1148 if (!ix86_arch_string)
1149 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1151 if (ix86_cmodel_string != 0)
1153 if (!strcmp (ix86_cmodel_string, "small"))
1154 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1156 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1157 else if (!strcmp (ix86_cmodel_string, "32"))
1158 ix86_cmodel = CM_32;
1159 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1160 ix86_cmodel = CM_KERNEL;
1161 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1162 ix86_cmodel = CM_MEDIUM;
1163 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1164 ix86_cmodel = CM_LARGE;
1166 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1170 ix86_cmodel = CM_32;
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1174 if (ix86_asm_string != 0)
1176 if (!strcmp (ix86_asm_string, "intel"))
1177 ix86_asm_dialect = ASM_INTEL;
1178 else if (!strcmp (ix86_asm_string, "att"))
1179 ix86_asm_dialect = ASM_ATT;
1181 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1183 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1184 error ("code model `%s' not supported in the %s bit mode",
1185 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1186 if (ix86_cmodel == CM_LARGE)
1187 sorry ("code model `large' not supported yet");
1188 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1189 sorry ("%i-bit mode not compiled in",
1190 (target_flags & MASK_64BIT) ? 64 : 32);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1195 ix86_arch = processor_alias_table[i].processor;
1196 /* Default cpu tuning to the architecture. */
1197 ix86_tune = ix86_arch;
1198 if (processor_alias_table[i].flags & PTA_MMX
1199 && !(target_flags_explicit & MASK_MMX))
1200 target_flags |= MASK_MMX;
1201 if (processor_alias_table[i].flags & PTA_3DNOW
1202 && !(target_flags_explicit & MASK_3DNOW))
1203 target_flags |= MASK_3DNOW;
1204 if (processor_alias_table[i].flags & PTA_3DNOW_A
1205 && !(target_flags_explicit & MASK_3DNOW_A))
1206 target_flags |= MASK_3DNOW_A;
1207 if (processor_alias_table[i].flags & PTA_SSE
1208 && !(target_flags_explicit & MASK_SSE))
1209 target_flags |= MASK_SSE;
1210 if (processor_alias_table[i].flags & PTA_SSE2
1211 && !(target_flags_explicit & MASK_SSE2))
1212 target_flags |= MASK_SSE2;
1213 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1214 x86_prefetch_sse = true;
1215 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1216 error ("CPU you selected does not support x86-64 instruction set");
1221 error ("bad value (%s) for -march= switch", ix86_arch_string);
1223 for (i = 0; i < pta_size; i++)
1224 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1226 ix86_tune = processor_alias_table[i].processor;
1227 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1228 error ("CPU you selected does not support x86-64 instruction set");
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1234 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1237 ix86_cost = &size_cost;
1239 ix86_cost = processor_target_table[ix86_tune].cost;
1240 target_flags |= processor_target_table[ix86_tune].target_enable;
1241 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1243 /* Arrange to set up i386_stack_locals for all functions. */
1244 init_machine_status = ix86_init_machine_status;
1246 /* Validate -mregparm= value. */
1247 if (ix86_regparm_string)
1249 i = atoi (ix86_regparm_string);
1250 if (i < 0 || i > REGPARM_MAX)
1251 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1257 ix86_regparm = REGPARM_MAX;
1259 /* If the user has provided any of the -malign-* options,
1260 warn and use that value only if -falign-* is not set.
1261 Remove this code in GCC 3.2 or later. */
1262 if (ix86_align_loops_string)
1264 warning ("-malign-loops is obsolete, use -falign-loops");
1265 if (align_loops == 0)
1267 i = atoi (ix86_align_loops_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1271 align_loops = 1 << i;
1275 if (ix86_align_jumps_string)
1277 warning ("-malign-jumps is obsolete, use -falign-jumps");
1278 if (align_jumps == 0)
1280 i = atoi (ix86_align_jumps_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1284 align_jumps = 1 << i;
1288 if (ix86_align_funcs_string)
1290 warning ("-malign-functions is obsolete, use -falign-functions");
1291 if (align_functions == 0)
1293 i = atoi (ix86_align_funcs_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 align_functions = 1 << i;
1301 /* Default align_* from the processor table. */
1302 if (align_loops == 0)
1304 align_loops = processor_target_table[ix86_tune].align_loop;
1305 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1307 if (align_jumps == 0)
1309 align_jumps = processor_target_table[ix86_tune].align_jump;
1310 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1312 if (align_functions == 0)
1314 align_functions = processor_target_table[ix86_tune].align_func;
1317 /* Validate -mpreferred-stack-boundary= value, or provide default.
1318 The default of 128 bits is for Pentium III's SSE __m128, but we
1319 don't want additional code to keep the stack aligned when
1320 optimizing for code size. */
1321 ix86_preferred_stack_boundary = (optimize_size
1322 ? TARGET_64BIT ? 128 : 32
1324 if (ix86_preferred_stack_boundary_string)
1326 i = atoi (ix86_preferred_stack_boundary_string);
1327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1329 TARGET_64BIT ? 4 : 2);
1331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1334 /* Validate -mbranch-cost= value, or provide default. */
1335 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1336 if (ix86_branch_cost_string)
1338 i = atoi (ix86_branch_cost_string);
1340 error ("-mbranch-cost=%d is not between 0 and 5", i);
1342 ix86_branch_cost = i;
1345 if (ix86_tls_dialect_string)
1347 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1348 ix86_tls_dialect = TLS_DIALECT_GNU;
1349 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_SUN;
1352 error ("bad value (%s) for -mtls-dialect= switch",
1353 ix86_tls_dialect_string);
1356 /* Keep nonleaf frame pointers. */
1357 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1358 flag_omit_frame_pointer = 1;
1360 /* If we're doing fast math, we don't care about comparison order
1361 wrt NaNs. This lets us use a shorter comparison sequence. */
1362 if (flag_unsafe_math_optimizations)
1363 target_flags &= ~MASK_IEEE_FP;
1365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1366 since the insns won't need emulation. */
1367 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1368 target_flags &= ~MASK_NO_FANCY_MATH_387;
1370 /* Turn on SSE2 builtins for -mpni. */
1372 target_flags |= MASK_SSE2;
1374 /* Turn on SSE builtins for -msse2. */
1376 target_flags |= MASK_SSE;
1380 if (TARGET_ALIGN_DOUBLE)
1381 error ("-malign-double makes no sense in the 64bit mode");
1383 error ("-mrtd calling convention not supported in the 64bit mode");
1384 /* Enable by default the SSE and MMX builtins. */
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1397 if (ix86_fpmath_string != 0)
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1409 ix86_fpmath = FPMATH_SSE;
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1419 else if (!TARGET_80387)
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1442 target_flags |= MASK_MMX;
1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1464 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1470 flag_schedule_insns = 0;
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1513 ix86_function_ok_for_sibcall (tree decl, tree exp)
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
1531 register for the address of the target function. Make sure that all
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1535 int regparm = ix86_regparm;
1538 /* We're looking at the CALL_EXPR, we need the type of the function. */
1539 type = TREE_OPERAND (exp, 0); /* pointer expression */
1540 type = TREE_TYPE (type); /* pointer type */
1541 type = TREE_TYPE (type); /* function type */
1543 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1545 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1549 /* ??? Need to count the actual number of registers to be used,
1550 not the possible number of registers. Fix later. */
1555 /* Otherwise okay. That also includes certain types of indirect calls. */
1559 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1560 arguments as in struct attribute_spec.handler. */
1562 ix86_handle_cdecl_attribute (tree *node, tree name,
1563 tree args ATTRIBUTE_UNUSED,
1564 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1577 if (is_attribute_p ("fastcall", name))
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1581 error ("fastcall and stdcall attributes are not compatible");
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1585 error ("fastcall and regparm attributes are not compatible");
1588 else if (is_attribute_p ("stdcall", name))
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1592 error ("fastcall and stdcall attributes are not compatible");
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1606 /* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1609 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1612 if (TREE_CODE (*node) != FUNCTION_TYPE
1613 && TREE_CODE (*node) != METHOD_TYPE
1614 && TREE_CODE (*node) != FIELD_DECL
1615 && TREE_CODE (*node) != TYPE_DECL)
1617 warning ("`%s' attribute only applies to functions",
1618 IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1625 cst = TREE_VALUE (args);
1626 if (TREE_CODE (cst) != INTEGER_CST)
1628 warning ("`%s' attribute requires an integer constant argument",
1629 IDENTIFIER_POINTER (name));
1630 *no_add_attrs = true;
1632 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1634 warning ("argument to `%s' attribute larger than %d",
1635 IDENTIFIER_POINTER (name), REGPARM_MAX);
1636 *no_add_attrs = true;
1639 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1641 error ("fastcall and regparm attributes are not compatible");
1648 /* Return 0 if the attributes for two types are incompatible, 1 if they
1649 are compatible, and 2 if they are nearly compatible (which causes a
1650 warning to be generated). */
1653 ix86_comp_type_attributes (tree type1, tree type2)
1655 /* Check for mismatch of non-default calling convention. */
1656 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1658 if (TREE_CODE (type1) != FUNCTION_TYPE)
1661 /* Check for mismatched fastcall types */
1662 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1666 /* Check for mismatched return types (cdecl vs stdcall). */
1667 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1668 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1673 /* Return the regparm value for a fuctio with the indicated TYPE. */
1676 ix86_fntype_regparm (tree type)
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1682 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1684 return ix86_regparm;
1687 /* Value is the number of bytes of arguments automatically
1688 popped when returning from a subroutine call.
1689 FUNDECL is the declaration node of the function (as a tree),
1690 FUNTYPE is the data type of the function (as a tree),
1691 or for a library call it is an identifier node for the subroutine name.
1692 SIZE is the number of bytes of arguments passed on the stack.
1694 On the 80386, the RTD insn may be used to pop them if the number
1695 of args is fixed, but if the number is variable then the caller
1696 must pop them all. RTD can't be used for library calls now
1697 because the library is compiled with the Unix compiler.
1698 Use of RTD is a selectable option, since it is incompatible with
1699 standard Unix calling sequences. If the option is not selected,
1700 the caller must always pop the args.
1702 The attribute stdcall is equivalent to RTD on a per module basis. */
1705 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1707 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1709 /* Cdecl functions override -mrtd, and never pop the stack. */
1710 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1712 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1713 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1714 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1718 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1719 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1720 == void_type_node)))
1724 /* Lose any fake structure return argument if it is passed on the stack. */
1725 if (aggregate_value_p (TREE_TYPE (funtype))
1728 int nregs = ix86_fntype_regparm (funtype);
1731 return GET_MODE_SIZE (Pmode);
1737 /* Argument support functions. */
1739 /* Return true when register may be used to pass function parameters. */
1741 ix86_function_arg_regno_p (int regno)
1745 return (regno < REGPARM_MAX
1746 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1747 if (SSE_REGNO_P (regno) && TARGET_SSE)
1749 /* RAX is used as hidden argument to va_arg functions. */
1752 for (i = 0; i < REGPARM_MAX; i++)
1753 if (regno == x86_64_int_parameter_registers[i])
1758 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1759 for a call to a function whose data type is FNTYPE.
1760 For a library call, FNTYPE is 0. */
1763 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1764 tree fntype, /* tree ptr for function decl */
1765 rtx libname, /* SYMBOL_REF of library name or 0 */
1768 static CUMULATIVE_ARGS zero_cum;
1769 tree param, next_param;
1770 bool user_convention = false;
1772 if (TARGET_DEBUG_ARG)
1774 fprintf (stderr, "\ninit_cumulative_args (");
1776 fprintf (stderr, "fntype code = %s, ret code = %s",
1777 tree_code_name[(int) TREE_CODE (fntype)],
1778 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1780 fprintf (stderr, "no fntype");
1783 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1788 /* Set up the number of registers to use for passing arguments. */
1789 cum->nregs = ix86_regparm;
1790 cum->sse_nregs = SSE_REGPARM_MAX;
1791 if (fntype && !TARGET_64BIT)
1793 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1797 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1798 user_convention = true;
1801 cum->maybe_vaarg = false;
1803 /* Use ecx and edx registers if function has fastcall attribute */
1804 if (fntype && !TARGET_64BIT)
1806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1810 user_convention = true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && fndecl
1816 && flag_unit_at_a_time)
1818 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1831 /* Determine if this function has variable arguments. This is
1832 indicated by the last argument being 'void_type_mode' if there
1833 are no variable arguments. If there are variable arguments, then
1834 we won't pass anything in registers */
1838 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1839 param != 0; param = next_param)
1841 next_param = TREE_CHAIN (param);
1842 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1849 cum->maybe_vaarg = true;
1853 if ((!fntype && !libname)
1854 || (fntype && !TYPE_ARG_TYPES (fntype)))
1855 cum->maybe_vaarg = 1;
1857 if (TARGET_DEBUG_ARG)
1858 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1863 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1864 of this code is to classify each 8bytes of incoming argument by the register
1865 class and assign registers accordingly. */
1867 /* Return the union class of CLASS1 and CLASS2.
1868 See the x86-64 PS ABI for details. */
1870 static enum x86_64_reg_class
1871 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1873 /* Rule #1: If both classes are equal, this is the resulting class. */
1874 if (class1 == class2)
1877 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1879 if (class1 == X86_64_NO_CLASS)
1881 if (class2 == X86_64_NO_CLASS)
1884 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1885 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1886 return X86_64_MEMORY_CLASS;
1888 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1889 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1890 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1891 return X86_64_INTEGERSI_CLASS;
1892 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1893 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1894 return X86_64_INTEGER_CLASS;
1896 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1897 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1898 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1899 return X86_64_MEMORY_CLASS;
1901 /* Rule #6: Otherwise class SSE is used. */
1902 return X86_64_SSE_CLASS;
1905 /* Classify the argument of type TYPE and mode MODE.
1906 CLASSES will be filled by the register class used to pass each word
1907 of the operand. The number of words is returned. In case the parameter
1908 should be passed in memory, 0 is returned. As a special case for zero
1909 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1911 BIT_OFFSET is used internally for handling records and specifies offset
1912 of the offset in bits modulo 256 to avoid overflow cases.
1914 See the x86-64 PS ABI for details.
1918 classify_argument (enum machine_mode mode, tree type,
1919 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1923 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1925 /* Variable sized entities are always passed/returned in memory. */
1929 if (mode != VOIDmode
1930 && MUST_PASS_IN_STACK (mode, type))
1933 if (type && AGGREGATE_TYPE_P (type))
1937 enum x86_64_reg_class subclasses[MAX_CLASSES];
1939 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1943 for (i = 0; i < words; i++)
1944 classes[i] = X86_64_NO_CLASS;
1946 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1947 signalize memory class, so handle it as special case. */
1950 classes[0] = X86_64_NO_CLASS;
1954 /* Classify each field of record and merge classes. */
1955 if (TREE_CODE (type) == RECORD_TYPE)
1957 /* For classes first merge in the field of the subclasses. */
1958 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1960 tree bases = TYPE_BINFO_BASETYPES (type);
1961 int n_bases = TREE_VEC_LENGTH (bases);
1964 for (i = 0; i < n_bases; ++i)
1966 tree binfo = TREE_VEC_ELT (bases, i);
1968 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1969 tree type = BINFO_TYPE (binfo);
1971 num = classify_argument (TYPE_MODE (type),
1973 (offset + bit_offset) % 256);
1976 for (i = 0; i < num; i++)
1978 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1980 merge_classes (subclasses[i], classes[i + pos]);
1984 /* And now merge the fields of structure. */
1985 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1987 if (TREE_CODE (field) == FIELD_DECL)
1991 /* Bitfields are always classified as integer. Handle them
1992 early, since later code would consider them to be
1993 misaligned integers. */
1994 if (DECL_BIT_FIELD (field))
1996 for (i = int_bit_position (field) / 8 / 8;
1997 i < (int_bit_position (field)
1998 + tree_low_cst (DECL_SIZE (field), 0)
2001 merge_classes (X86_64_INTEGER_CLASS,
2006 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2007 TREE_TYPE (field), subclasses,
2008 (int_bit_position (field)
2009 + bit_offset) % 256);
2012 for (i = 0; i < num; i++)
2015 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2017 merge_classes (subclasses[i], classes[i + pos]);
2023 /* Arrays are handled as small records. */
2024 else if (TREE_CODE (type) == ARRAY_TYPE)
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2028 TREE_TYPE (type), subclasses, bit_offset);
2032 /* The partial classes are now full classes. */
2033 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2034 subclasses[0] = X86_64_SSE_CLASS;
2035 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2036 subclasses[0] = X86_64_INTEGER_CLASS;
2038 for (i = 0; i < words; i++)
2039 classes[i] = subclasses[i % num];
2041 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2042 else if (TREE_CODE (type) == UNION_TYPE
2043 || TREE_CODE (type) == QUAL_UNION_TYPE)
2045 /* For classes first merge in the field of the subclasses. */
2046 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2048 tree bases = TYPE_BINFO_BASETYPES (type);
2049 int n_bases = TREE_VEC_LENGTH (bases);
2052 for (i = 0; i < n_bases; ++i)
2054 tree binfo = TREE_VEC_ELT (bases, i);
2056 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2057 tree type = BINFO_TYPE (binfo);
2059 num = classify_argument (TYPE_MODE (type),
2061 (offset + (bit_offset % 64)) % 256);
2064 for (i = 0; i < num; i++)
2066 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2068 merge_classes (subclasses[i], classes[i + pos]);
2072 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2074 if (TREE_CODE (field) == FIELD_DECL)
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2082 for (i = 0; i < num; i++)
2083 classes[i] = merge_classes (subclasses[i], classes[i]);
2090 /* Final merger cleanup. */
2091 for (i = 0; i < words; i++)
2093 /* If one class is MEMORY, everything should be passed in
2095 if (classes[i] == X86_64_MEMORY_CLASS)
2098 /* The X86_64_SSEUP_CLASS should be always preceded by
2099 X86_64_SSE_CLASS. */
2100 if (classes[i] == X86_64_SSEUP_CLASS
2101 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2102 classes[i] = X86_64_SSE_CLASS;
2104 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2105 if (classes[i] == X86_64_X87UP_CLASS
2106 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2107 classes[i] = X86_64_SSE_CLASS;
2112 /* Compute alignment needed. We align all types to natural boundaries with
2113 exception of XFmode that is aligned to 64bits. */
2114 if (mode != VOIDmode && mode != BLKmode)
2116 int mode_alignment = GET_MODE_BITSIZE (mode);
2119 mode_alignment = 128;
2120 else if (mode == XCmode)
2121 mode_alignment = 256;
2122 /* Misaligned fields are always returned in memory. */
2123 if (bit_offset % mode_alignment)
2127 /* Classification of atomic types. */
2137 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2138 classes[0] = X86_64_INTEGERSI_CLASS;
2140 classes[0] = X86_64_INTEGER_CLASS;
2144 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2147 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2148 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2151 if (!(bit_offset % 64))
2152 classes[0] = X86_64_SSESF_CLASS;
2154 classes[0] = X86_64_SSE_CLASS;
2157 classes[0] = X86_64_SSEDF_CLASS;
2160 classes[0] = X86_64_X87_CLASS;
2161 classes[1] = X86_64_X87UP_CLASS;
2164 classes[0] = X86_64_X87_CLASS;
2165 classes[1] = X86_64_X87UP_CLASS;
2166 classes[2] = X86_64_X87_CLASS;
2167 classes[3] = X86_64_X87UP_CLASS;
2170 classes[0] = X86_64_SSEDF_CLASS;
2171 classes[1] = X86_64_SSEDF_CLASS;
2174 classes[0] = X86_64_SSE_CLASS;
2182 classes[0] = X86_64_SSE_CLASS;
2183 classes[1] = X86_64_SSEUP_CLASS;
2198 /* Examine the argument and return set number of register required in each
2199 class. Return 0 iff parameter should be passed in memory. */
2201 examine_argument (enum machine_mode mode, tree type, int in_return,
2202 int *int_nregs, int *sse_nregs)
2204 enum x86_64_reg_class class[MAX_CLASSES];
2205 int n = classify_argument (mode, type, class, 0);
2211 for (n--; n >= 0; n--)
2214 case X86_64_INTEGER_CLASS:
2215 case X86_64_INTEGERSI_CLASS:
2218 case X86_64_SSE_CLASS:
2219 case X86_64_SSESF_CLASS:
2220 case X86_64_SSEDF_CLASS:
2223 case X86_64_NO_CLASS:
2224 case X86_64_SSEUP_CLASS:
2226 case X86_64_X87_CLASS:
2227 case X86_64_X87UP_CLASS:
2231 case X86_64_MEMORY_CLASS:
2236 /* Construct container for the argument used by GCC interface. See
2237 FUNCTION_ARG for the detailed description. */
2239 construct_container (enum machine_mode mode, tree type, int in_return,
2240 int nintregs, int nsseregs, const int * intreg,
2243 enum machine_mode tmpmode;
2245 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2246 enum x86_64_reg_class class[MAX_CLASSES];
2250 int needed_sseregs, needed_intregs;
2251 rtx exp[MAX_CLASSES];
2254 n = classify_argument (mode, type, class, 0);
2255 if (TARGET_DEBUG_ARG)
2258 fprintf (stderr, "Memory class\n");
2261 fprintf (stderr, "Classes:");
2262 for (i = 0; i < n; i++)
2264 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2266 fprintf (stderr, "\n");
2271 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2273 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2276 /* First construct simple cases. Avoid SCmode, since we want to use
2277 single register to pass this type. */
2278 if (n == 1 && mode != SCmode)
2281 case X86_64_INTEGER_CLASS:
2282 case X86_64_INTEGERSI_CLASS:
2283 return gen_rtx_REG (mode, intreg[0]);
2284 case X86_64_SSE_CLASS:
2285 case X86_64_SSESF_CLASS:
2286 case X86_64_SSEDF_CLASS:
2287 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2288 case X86_64_X87_CLASS:
2289 return gen_rtx_REG (mode, FIRST_STACK_REG);
2290 case X86_64_NO_CLASS:
2291 /* Zero sized array, struct or class. */
2296 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2297 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2299 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2300 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2301 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2302 && class[1] == X86_64_INTEGER_CLASS
2303 && (mode == CDImode || mode == TImode)
2304 && intreg[0] + 1 == intreg[1])
2305 return gen_rtx_REG (mode, intreg[0]);
2307 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2308 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2309 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2311 /* Otherwise figure out the entries of the PARALLEL. */
2312 for (i = 0; i < n; i++)
2316 case X86_64_NO_CLASS:
2318 case X86_64_INTEGER_CLASS:
2319 case X86_64_INTEGERSI_CLASS:
2320 /* Merge TImodes on aligned occasions here too. */
2321 if (i * 8 + 8 > bytes)
2322 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2323 else if (class[i] == X86_64_INTEGERSI_CLASS)
2327 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2328 if (tmpmode == BLKmode)
2330 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2331 gen_rtx_REG (tmpmode, *intreg),
2335 case X86_64_SSESF_CLASS:
2336 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2337 gen_rtx_REG (SFmode,
2338 SSE_REGNO (sse_regno)),
2342 case X86_64_SSEDF_CLASS:
2343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2344 gen_rtx_REG (DFmode,
2345 SSE_REGNO (sse_regno)),
2349 case X86_64_SSE_CLASS:
2350 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (tmpmode,
2356 SSE_REGNO (sse_regno)),
2358 if (tmpmode == TImode)
2366 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2367 for (i = 0; i < nexps; i++)
2368 XVECEXP (ret, 0, i) = exp [i];
2372 /* Update the data in CUM to advance over an argument
2373 of mode MODE and data type TYPE.
2374 (TYPE is null for libcalls where that information may not be available.) */
2377 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2378 enum machine_mode mode, /* current arg mode */
2379 tree type, /* type of the argument or 0 if lib support */
2380 int named) /* whether or not the argument was named */
2383 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2384 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2386 if (TARGET_DEBUG_ARG)
2388 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2389 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2392 int int_nregs, sse_nregs;
2393 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2394 cum->words += words;
2395 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2397 cum->nregs -= int_nregs;
2398 cum->sse_nregs -= sse_nregs;
2399 cum->regno += int_nregs;
2400 cum->sse_regno += sse_nregs;
2403 cum->words += words;
2407 if (TARGET_SSE && mode == TImode)
2409 cum->sse_words += words;
2410 cum->sse_nregs -= 1;
2411 cum->sse_regno += 1;
2412 if (cum->sse_nregs <= 0)
2420 cum->words += words;
2421 cum->nregs -= words;
2422 cum->regno += words;
2424 if (cum->nregs <= 0)
2434 /* Define where to put the arguments to a function.
2435 Value is zero to push the argument on the stack,
2436 or a hard register in which to store the argument.
2438 MODE is the argument's machine mode.
2439 TYPE is the data type of the argument (as a tree).
2440 This is null for libcalls where that information may
2442 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2443 the preceding args and about the function being called.
2444 NAMED is nonzero if this argument is a named parameter
2445 (otherwise it is an extra parameter matching an ellipsis). */
2448 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2449 enum machine_mode mode, /* current arg mode */
2450 tree type, /* type of the argument or 0 if lib support */
2451 int named) /* != 0 for normal args, == 0 for ... args */
2455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2458 /* Handle a hidden AL argument containing number of registers for varargs
2459 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2461 if (mode == VOIDmode)
2464 return GEN_INT (cum->maybe_vaarg
2465 ? (cum->sse_nregs < 0
2473 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2474 &x86_64_int_parameter_registers [cum->regno],
2479 /* For now, pass fp/complex values on the stack. */
2491 if (words <= cum->nregs)
2493 int regno = cum->regno;
2495 /* Fastcall allocates the first two DWORD (SImode) or
2496 smaller arguments to ECX and EDX. */
2499 if (mode == BLKmode || mode == DImode)
2502 /* ECX not EAX is the first allocated register. */
2506 ret = gen_rtx_REG (mode, regno);
2511 ret = gen_rtx_REG (mode, cum->sse_regno);
2515 if (TARGET_DEBUG_ARG)
2518 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2519 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2522 print_simple_rtl (stderr, ret);
2524 fprintf (stderr, ", stack");
2526 fprintf (stderr, " )\n");
2532 /* A C expression that indicates when an argument must be passed by
2533 reference. If nonzero for an argument, a copy of that argument is
2534 made in memory and a pointer to the argument is passed instead of
2535 the argument itself. The pointer is passed in whatever way is
2536 appropriate for passing a pointer to that type. */
2539 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2540 enum machine_mode mode ATTRIBUTE_UNUSED,
2541 tree type, int named ATTRIBUTE_UNUSED)
2546 if (type && int_size_in_bytes (type) == -1)
2548 if (TARGET_DEBUG_ARG)
2549 fprintf (stderr, "function_arg_pass_by_reference\n");
2556 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2559 contains_128bit_aligned_vector_p (tree type)
2561 enum machine_mode mode = TYPE_MODE (type);
2562 if (SSE_REG_MODE_P (mode)
2563 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2565 if (TYPE_ALIGN (type) < 128)
2568 if (AGGREGATE_TYPE_P (type))
2570 /* Walk the aggregates recursively. */
2571 if (TREE_CODE (type) == RECORD_TYPE
2572 || TREE_CODE (type) == UNION_TYPE
2573 || TREE_CODE (type) == QUAL_UNION_TYPE)
2577 if (TYPE_BINFO (type) != NULL
2578 && TYPE_BINFO_BASETYPES (type) != NULL)
2580 tree bases = TYPE_BINFO_BASETYPES (type);
2581 int n_bases = TREE_VEC_LENGTH (bases);
2584 for (i = 0; i < n_bases; ++i)
2586 tree binfo = TREE_VEC_ELT (bases, i);
2587 tree type = BINFO_TYPE (binfo);
2589 if (contains_128bit_aligned_vector_p (type))
2593 /* And now merge the fields of structure. */
2594 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2596 if (TREE_CODE (field) == FIELD_DECL
2597 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2601 /* Just for use if some languages passes arrays by value. */
2602 else if (TREE_CODE (type) == ARRAY_TYPE)
2604 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2613 /* Gives the alignment boundary, in bits, of an argument with the
2614 specified mode and type. */
2617 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2621 align = TYPE_ALIGN (type);
2623 align = GET_MODE_ALIGNMENT (mode);
2624 if (align < PARM_BOUNDARY)
2625 align = PARM_BOUNDARY;
2628 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2629 make an exception for SSE modes since these require 128bit
2632 The handling here differs from field_alignment. ICC aligns MMX
2633 arguments to 4 byte boundaries, while structure fields are aligned
2634 to 8 byte boundaries. */
2637 if (!SSE_REG_MODE_P (mode))
2638 align = PARM_BOUNDARY;
2642 if (!contains_128bit_aligned_vector_p (type))
2643 align = PARM_BOUNDARY;
2651 /* Return true if N is a possible register number of function value. */
2653 ix86_function_value_regno_p (int regno)
2657 return ((regno) == 0
2658 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2659 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2661 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2662 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2663 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2666 /* Define how to find the value returned by a function.
2667 VALTYPE is the data type of the value (as a tree).
2668 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2669 otherwise, FUNC is 0. */
2671 ix86_function_value (tree valtype)
2675 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2676 REGPARM_MAX, SSE_REGPARM_MAX,
2677 x86_64_int_return_registers, 0);
2678 /* For zero sized structures, construct_container return NULL, but we need
2679 to keep rest of compiler happy by returning meaningful value. */
2681 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2685 return gen_rtx_REG (TYPE_MODE (valtype),
2686 ix86_value_regno (TYPE_MODE (valtype)));
2689 /* Return false iff type is returned in memory. */
2691 ix86_return_in_memory (tree type)
2693 int needed_intregs, needed_sseregs;
2696 return !examine_argument (TYPE_MODE (type), type, 1,
2697 &needed_intregs, &needed_sseregs);
2701 if (TYPE_MODE (type) == BLKmode)
2703 else if (MS_AGGREGATE_RETURN
2704 && AGGREGATE_TYPE_P (type)
2705 && int_size_in_bytes(type) <= 8)
2707 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2708 && int_size_in_bytes (type) == 8)
2709 || (int_size_in_bytes (type) > 12
2710 && TYPE_MODE (type) != TImode
2711 && TYPE_MODE (type) != TFmode
2712 && !VECTOR_MODE_P (TYPE_MODE (type))))
2718 /* Define how to find the value returned by a library function
2719 assuming the value has mode MODE. */
2721 ix86_libcall_value (enum machine_mode mode)
2731 return gen_rtx_REG (mode, FIRST_SSE_REG);
2734 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2736 return gen_rtx_REG (mode, 0);
2740 return gen_rtx_REG (mode, ix86_value_regno (mode));
2743 /* Given a mode, return the register to use for a return value. */
2746 ix86_value_regno (enum machine_mode mode)
2748 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2749 return FIRST_FLOAT_REG;
2750 if (mode == TImode || VECTOR_MODE_P (mode))
2751 return FIRST_SSE_REG;
2755 /* Create the va_list data type. */
2758 ix86_build_va_list (void)
2760 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2762 /* For i386 we use plain pointer to argument area. */
2764 return build_pointer_type (char_type_node);
2766 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2767 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2769 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2770 unsigned_type_node);
2771 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2772 unsigned_type_node);
2773 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2775 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2778 DECL_FIELD_CONTEXT (f_gpr) = record;
2779 DECL_FIELD_CONTEXT (f_fpr) = record;
2780 DECL_FIELD_CONTEXT (f_ovf) = record;
2781 DECL_FIELD_CONTEXT (f_sav) = record;
2783 TREE_CHAIN (record) = type_decl;
2784 TYPE_NAME (record) = type_decl;
2785 TYPE_FIELDS (record) = f_gpr;
2786 TREE_CHAIN (f_gpr) = f_fpr;
2787 TREE_CHAIN (f_fpr) = f_ovf;
2788 TREE_CHAIN (f_ovf) = f_sav;
2790 layout_type (record);
2792 /* The correct type is an array type of one element. */
2793 return build_array_type (record, build_index_type (size_zero_node));
2796 /* Perform any needed actions needed for a function that is receiving a
2797 variable number of arguments.
2801 MODE and TYPE are the mode and type of the current parameter.
2803 PRETEND_SIZE is a variable that should be set to the amount of stack
2804 that must be pushed by the prolog to pretend that our caller pushed
2807 Normally, this macro will push all remaining incoming registers on the
2808 stack and set PRETEND_SIZE to the length of the registers pushed. */
2811 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2812 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2815 CUMULATIVE_ARGS next_cum;
2816 rtx save_area = NULL_RTX, mem;
2829 /* Indicate to allocate space on the stack for varargs save area. */
2830 ix86_save_varrargs_registers = 1;
2832 cfun->stack_alignment_needed = 128;
2834 fntype = TREE_TYPE (current_function_decl);
2835 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2836 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2837 != void_type_node));
2839 /* For varargs, we do not want to skip the dummy va_dcl argument.
2840 For stdargs, we do want to skip the last named argument. */
2843 function_arg_advance (&next_cum, mode, type, 1);
2846 save_area = frame_pointer_rtx;
2848 set = get_varargs_alias_set ();
2850 for (i = next_cum.regno; i < ix86_regparm; i++)
2852 mem = gen_rtx_MEM (Pmode,
2853 plus_constant (save_area, i * UNITS_PER_WORD));
2854 set_mem_alias_set (mem, set);
2855 emit_move_insn (mem, gen_rtx_REG (Pmode,
2856 x86_64_int_parameter_registers[i]));
2859 if (next_cum.sse_nregs)
2861 /* Now emit code to save SSE registers. The AX parameter contains number
2862 of SSE parameter registers used to call this function. We use
2863 sse_prologue_save insn template that produces computed jump across
2864 SSE saves. We need some preparation work to get this working. */
2866 label = gen_label_rtx ();
2867 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2869 /* Compute address to jump to :
2870 label - 5*eax + nnamed_sse_arguments*5 */
2871 tmp_reg = gen_reg_rtx (Pmode);
2872 nsse_reg = gen_reg_rtx (Pmode);
2873 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2874 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2875 gen_rtx_MULT (Pmode, nsse_reg,
2877 if (next_cum.sse_regno)
2880 gen_rtx_CONST (DImode,
2881 gen_rtx_PLUS (DImode,
2883 GEN_INT (next_cum.sse_regno * 4))));
2885 emit_move_insn (nsse_reg, label_ref);
2886 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2888 /* Compute address of memory block we save into. We always use pointer
2889 pointing 127 bytes after first byte to store - this is needed to keep
2890 instruction size limited by 4 bytes. */
2891 tmp_reg = gen_reg_rtx (Pmode);
2892 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2893 plus_constant (save_area,
2894 8 * REGPARM_MAX + 127)));
2895 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2896 set_mem_alias_set (mem, set);
2897 set_mem_align (mem, BITS_PER_WORD);
2899 /* And finally do the dirty job! */
2900 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2901 GEN_INT (next_cum.sse_regno), label));
2906 /* Implement va_start. */
2909 ix86_va_start (tree valist, rtx nextarg)
2911 HOST_WIDE_INT words, n_gpr, n_fpr;
2912 tree f_gpr, f_fpr, f_ovf, f_sav;
2913 tree gpr, fpr, ovf, sav, t;
2915 /* Only 64bit target needs something special. */
2918 std_expand_builtin_va_start (valist, nextarg);
2922 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2923 f_fpr = TREE_CHAIN (f_gpr);
2924 f_ovf = TREE_CHAIN (f_fpr);
2925 f_sav = TREE_CHAIN (f_ovf);
2927 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2928 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2929 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2930 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2931 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2933 /* Count number of gp and fp argument registers used. */
2934 words = current_function_args_info.words;
2935 n_gpr = current_function_args_info.regno;
2936 n_fpr = current_function_args_info.sse_regno;
2938 if (TARGET_DEBUG_ARG)
2939 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2940 (int) words, (int) n_gpr, (int) n_fpr);
2942 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2943 build_int_2 (n_gpr * 8, 0));
2944 TREE_SIDE_EFFECTS (t) = 1;
2945 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2947 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2948 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2949 TREE_SIDE_EFFECTS (t) = 1;
2950 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2952 /* Find the overflow area. */
2953 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2955 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2956 build_int_2 (words * UNITS_PER_WORD, 0));
2957 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2958 TREE_SIDE_EFFECTS (t) = 1;
2959 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2961 /* Find the register save area.
2962 Prologue of the function save it right above stack frame. */
2963 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2964 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2965 TREE_SIDE_EFFECTS (t) = 1;
2966 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2969 /* Implement va_arg. */
2971 ix86_va_arg (tree valist, tree type)
2973 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2974 tree f_gpr, f_fpr, f_ovf, f_sav;
2975 tree gpr, fpr, ovf, sav, t;
2977 rtx lab_false, lab_over = NULL_RTX;
2982 /* Only 64bit target needs something special. */
2985 return std_expand_builtin_va_arg (valist, type);
2988 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2989 f_fpr = TREE_CHAIN (f_gpr);
2990 f_ovf = TREE_CHAIN (f_fpr);
2991 f_sav = TREE_CHAIN (f_ovf);
2993 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2994 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2995 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2996 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2997 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2999 size = int_size_in_bytes (type);
3002 /* Passed by reference. */
3004 type = build_pointer_type (type);
3005 size = int_size_in_bytes (type);
3007 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3009 container = construct_container (TYPE_MODE (type), type, 0,
3010 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3012 * Pull the value out of the saved registers ...
3015 addr_rtx = gen_reg_rtx (Pmode);
3019 rtx int_addr_rtx, sse_addr_rtx;
3020 int needed_intregs, needed_sseregs;
3023 lab_over = gen_label_rtx ();
3024 lab_false = gen_label_rtx ();
3026 examine_argument (TYPE_MODE (type), type, 0,
3027 &needed_intregs, &needed_sseregs);
3030 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3031 || TYPE_ALIGN (type) > 128);
3033 /* In case we are passing structure, verify that it is consecutive block
3034 on the register save area. If not we need to do moves. */
3035 if (!need_temp && !REG_P (container))
3037 /* Verify that all registers are strictly consecutive */
3038 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3042 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3044 rtx slot = XVECEXP (container, 0, i);
3045 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3046 || INTVAL (XEXP (slot, 1)) != i * 16)
3054 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3056 rtx slot = XVECEXP (container, 0, i);
3057 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3058 || INTVAL (XEXP (slot, 1)) != i * 8)
3065 int_addr_rtx = addr_rtx;
3066 sse_addr_rtx = addr_rtx;
3070 int_addr_rtx = gen_reg_rtx (Pmode);
3071 sse_addr_rtx = gen_reg_rtx (Pmode);
3073 /* First ensure that we fit completely in registers. */
3076 emit_cmp_and_jump_insns (expand_expr
3077 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3078 GEN_INT ((REGPARM_MAX - needed_intregs +
3079 1) * 8), GE, const1_rtx, SImode,
3084 emit_cmp_and_jump_insns (expand_expr
3085 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3086 GEN_INT ((SSE_REGPARM_MAX -
3087 needed_sseregs + 1) * 16 +
3088 REGPARM_MAX * 8), GE, const1_rtx,
3089 SImode, 1, lab_false);
3092 /* Compute index to start of area used for integer regs. */
3095 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3096 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3097 if (r != int_addr_rtx)
3098 emit_move_insn (int_addr_rtx, r);
3102 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3103 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3104 if (r != sse_addr_rtx)
3105 emit_move_insn (sse_addr_rtx, r);
3113 /* Never use the memory itself, as it has the alias set. */
3114 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3115 mem = gen_rtx_MEM (BLKmode, x);
3116 force_operand (x, addr_rtx);
3117 set_mem_alias_set (mem, get_varargs_alias_set ());
3118 set_mem_align (mem, BITS_PER_UNIT);
3120 for (i = 0; i < XVECLEN (container, 0); i++)
3122 rtx slot = XVECEXP (container, 0, i);
3123 rtx reg = XEXP (slot, 0);
3124 enum machine_mode mode = GET_MODE (reg);
3130 if (SSE_REGNO_P (REGNO (reg)))
3132 src_addr = sse_addr_rtx;
3133 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3137 src_addr = int_addr_rtx;
3138 src_offset = REGNO (reg) * 8;
3140 src_mem = gen_rtx_MEM (mode, src_addr);
3141 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3142 src_mem = adjust_address (src_mem, mode, src_offset);
3143 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3144 emit_move_insn (dest_mem, src_mem);
3151 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3152 build_int_2 (needed_intregs * 8, 0));
3153 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3154 TREE_SIDE_EFFECTS (t) = 1;
3155 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3160 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3161 build_int_2 (needed_sseregs * 16, 0));
3162 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3163 TREE_SIDE_EFFECTS (t) = 1;
3164 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3167 emit_jump_insn (gen_jump (lab_over));
3169 emit_label (lab_false);
3172 /* ... otherwise out of the overflow area. */
3174 /* Care for on-stack alignment if needed. */
3175 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3179 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3180 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3181 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3185 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3187 emit_move_insn (addr_rtx, r);
3190 build (PLUS_EXPR, TREE_TYPE (t), t,
3191 build_int_2 (rsize * UNITS_PER_WORD, 0));
3192 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3193 TREE_SIDE_EFFECTS (t) = 1;
3194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3197 emit_label (lab_over);
3201 r = gen_rtx_MEM (Pmode, addr_rtx);
3202 set_mem_alias_set (r, get_varargs_alias_set ());
3203 emit_move_insn (addr_rtx, r);
3209 /* Return nonzero if OP is either a i387 or SSE fp register. */
3211 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3213 return ANY_FP_REG_P (op);
3216 /* Return nonzero if OP is an i387 fp register. */
3218 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3220 return FP_REG_P (op);
3223 /* Return nonzero if OP is a non-fp register_operand. */
3225 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3227 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3230 /* Return nonzero if OP is a register operand other than an
3231 i387 fp register. */
3233 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3235 return register_operand (op, mode) && !FP_REG_P (op);
3238 /* Return nonzero if OP is general operand representable on x86_64. */
3241 x86_64_general_operand (rtx op, enum machine_mode mode)
3244 return general_operand (op, mode);
3245 if (nonimmediate_operand (op, mode))
3247 return x86_64_sign_extended_value (op);
3250 /* Return nonzero if OP is general operand representable on x86_64
3251 as either sign extended or zero extended constant. */
3254 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3257 return general_operand (op, mode);
3258 if (nonimmediate_operand (op, mode))
3260 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3263 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3266 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3269 return nonmemory_operand (op, mode);
3270 if (register_operand (op, mode))
3272 return x86_64_sign_extended_value (op);
3275 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3278 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3280 if (!TARGET_64BIT || !flag_pic)
3281 return nonmemory_operand (op, mode);
3282 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3284 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3289 /* Return nonzero if OPNUM's MEM should be matched
3290 in movabs* patterns. */
3293 ix86_check_movabs (rtx insn, int opnum)
3297 set = PATTERN (insn);
3298 if (GET_CODE (set) == PARALLEL)
3299 set = XVECEXP (set, 0, 0);
3300 if (GET_CODE (set) != SET)
3302 mem = XEXP (set, opnum);
3303 while (GET_CODE (mem) == SUBREG)
3304 mem = SUBREG_REG (mem);
3305 if (GET_CODE (mem) != MEM)
3307 return (volatile_ok || !MEM_VOLATILE_P (mem));
3310 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3313 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3316 return nonmemory_operand (op, mode);
3317 if (register_operand (op, mode))
3319 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3322 /* Return nonzero if OP is immediate operand representable on x86_64. */
3325 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3328 return immediate_operand (op, mode);
3329 return x86_64_sign_extended_value (op);
3332 /* Return nonzero if OP is immediate operand representable on x86_64. */
3335 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3337 return x86_64_zero_extended_value (op);
3340 /* Return nonzero if OP is (const_int 1), else return zero. */
3343 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3345 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3348 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3349 for shift & compare patterns, as shifting by 0 does not change flags),
3350 else return zero. */
3353 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3355 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3358 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3359 reference and a constant. */
3362 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3364 switch (GET_CODE (op))
3372 if (GET_CODE (op) == SYMBOL_REF
3373 || GET_CODE (op) == LABEL_REF
3374 || (GET_CODE (op) == UNSPEC
3375 && (XINT (op, 1) == UNSPEC_GOT
3376 || XINT (op, 1) == UNSPEC_GOTOFF
3377 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3379 if (GET_CODE (op) != PLUS
3380 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3384 if (GET_CODE (op) == SYMBOL_REF
3385 || GET_CODE (op) == LABEL_REF)
3387 /* Only @GOTOFF gets offsets. */
3388 if (GET_CODE (op) != UNSPEC
3389 || XINT (op, 1) != UNSPEC_GOTOFF)
3392 op = XVECEXP (op, 0, 0);
3393 if (GET_CODE (op) == SYMBOL_REF
3394 || GET_CODE (op) == LABEL_REF)
3403 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3406 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3408 if (GET_CODE (op) != CONST)
3413 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3418 if (GET_CODE (op) == UNSPEC)
3420 if (GET_CODE (op) != PLUS
3421 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3424 if (GET_CODE (op) == UNSPEC)
3430 /* Return true if OP is a symbolic operand that resolves locally. */
3433 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3435 if (GET_CODE (op) == CONST
3436 && GET_CODE (XEXP (op, 0)) == PLUS
3437 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3438 op = XEXP (XEXP (op, 0), 0);
3440 if (GET_CODE (op) == LABEL_REF)
3443 if (GET_CODE (op) != SYMBOL_REF)
3446 if (SYMBOL_REF_LOCAL_P (op))
3449 /* There is, however, a not insubstantial body of code in the rest of
3450 the compiler that assumes it can just stick the results of
3451 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3452 /* ??? This is a hack. Should update the body of the compiler to
3453 always create a DECL an invoke targetm.encode_section_info. */
3454 if (strncmp (XSTR (op, 0), internal_label_prefix,
3455 internal_label_prefix_len) == 0)
3461 /* Test for various thread-local symbols. */
3464 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3466 if (GET_CODE (op) != SYMBOL_REF)
3468 return SYMBOL_REF_TLS_MODEL (op);
3472 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3474 if (GET_CODE (op) != SYMBOL_REF)
3476 return SYMBOL_REF_TLS_MODEL (op) == kind;
3480 global_dynamic_symbolic_operand (register rtx op,
3481 enum machine_mode mode ATTRIBUTE_UNUSED)
3483 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3487 local_dynamic_symbolic_operand (register rtx op,
3488 enum machine_mode mode ATTRIBUTE_UNUSED)
3490 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3494 initial_exec_symbolic_operand (register rtx op,
3495 enum machine_mode mode ATTRIBUTE_UNUSED)
3497 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3501 local_exec_symbolic_operand (register rtx op,
3502 enum machine_mode mode ATTRIBUTE_UNUSED)
3504 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3507 /* Test for a valid operand for a call instruction. Don't allow the
3508 arg pointer register or virtual regs since they may decay into
3509 reg + const, which the patterns can't handle. */
3512 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3514 /* Disallow indirect through a virtual register. This leads to
3515 compiler aborts when trying to eliminate them. */
3516 if (GET_CODE (op) == REG
3517 && (op == arg_pointer_rtx
3518 || op == frame_pointer_rtx
3519 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3520 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3523 /* Disallow `call 1234'. Due to varying assembler lameness this
3524 gets either rejected or translated to `call .+1234'. */
3525 if (GET_CODE (op) == CONST_INT)
3528 /* Explicitly allow SYMBOL_REF even if pic. */
3529 if (GET_CODE (op) == SYMBOL_REF)
3532 /* Otherwise we can allow any general_operand in the address. */
3533 return general_operand (op, Pmode);
3536 /* Test for a valid operand for a call instruction. Don't allow the
3537 arg pointer register or virtual regs since they may decay into
3538 reg + const, which the patterns can't handle. */
3541 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3543 /* Disallow indirect through a virtual register. This leads to
3544 compiler aborts when trying to eliminate them. */
3545 if (GET_CODE (op) == REG
3546 && (op == arg_pointer_rtx
3547 || op == frame_pointer_rtx
3548 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3549 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3552 /* Explicitly allow SYMBOL_REF even if pic. */
3553 if (GET_CODE (op) == SYMBOL_REF)
3556 /* Otherwise we can only allow register operands. */
3557 return register_operand (op, Pmode);
3561 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3563 if (GET_CODE (op) == CONST
3564 && GET_CODE (XEXP (op, 0)) == PLUS
3565 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3566 op = XEXP (XEXP (op, 0), 0);
3567 return GET_CODE (op) == SYMBOL_REF;
3570 /* Match exactly zero and one. */
3573 const0_operand (register rtx op, enum machine_mode mode)
3575 return op == CONST0_RTX (mode);
3579 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3581 return op == const1_rtx;
3584 /* Match 2, 4, or 8. Used for leal multiplicands. */
3587 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3589 return (GET_CODE (op) == CONST_INT
3590 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3593 /* True if this is a constant appropriate for an increment or decrement. */
3596 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3598 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3599 registers, since carry flag is not set. */
3600 if (TARGET_PENTIUM4 && !optimize_size)
3602 return op == const1_rtx || op == constm1_rtx;
3605 /* Return nonzero if OP is acceptable as operand of DImode shift
3609 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3612 return nonimmediate_operand (op, mode);
3614 return register_operand (op, mode);
3617 /* Return false if this is the stack pointer, or any other fake
3618 register eliminable to the stack pointer. Otherwise, this is
3621 This is used to prevent esp from being used as an index reg.
3622 Which would only happen in pathological cases. */
3625 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3628 if (GET_CODE (t) == SUBREG)
3630 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3633 return register_operand (op, mode);
3637 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3639 return MMX_REG_P (op);
3642 /* Return false if this is any eliminable register. Otherwise
3646 general_no_elim_operand (register rtx op, enum machine_mode mode)
3649 if (GET_CODE (t) == SUBREG)
3651 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3652 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3653 || t == virtual_stack_dynamic_rtx)
3656 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3657 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3660 return general_operand (op, mode);
3663 /* Return false if this is any eliminable register. Otherwise
3664 register_operand or const_int. */
3667 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3670 if (GET_CODE (t) == SUBREG)
3672 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3673 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3674 || t == virtual_stack_dynamic_rtx)
3677 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3680 /* Return false if this is any eliminable register or stack register,
3681 otherwise work like register_operand. */
3684 index_register_operand (register rtx op, enum machine_mode mode)
3687 if (GET_CODE (t) == SUBREG)
3691 if (t == arg_pointer_rtx
3692 || t == frame_pointer_rtx
3693 || t == virtual_incoming_args_rtx
3694 || t == virtual_stack_vars_rtx
3695 || t == virtual_stack_dynamic_rtx
3696 || REGNO (t) == STACK_POINTER_REGNUM)
3699 return general_operand (op, mode);
3702 /* Return true if op is a Q_REGS class register. */
3705 q_regs_operand (register rtx op, enum machine_mode mode)
3707 if (mode != VOIDmode && GET_MODE (op) != mode)
3709 if (GET_CODE (op) == SUBREG)
3710 op = SUBREG_REG (op);
3711 return ANY_QI_REG_P (op);
3714 /* Return true if op is an flags register. */
3717 flags_reg_operand (register rtx op, enum machine_mode mode)
3719 if (mode != VOIDmode && GET_MODE (op) != mode)
3721 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3724 /* Return true if op is a NON_Q_REGS class register. */
3727 non_q_regs_operand (register rtx op, enum machine_mode mode)
3729 if (mode != VOIDmode && GET_MODE (op) != mode)
3731 if (GET_CODE (op) == SUBREG)
3732 op = SUBREG_REG (op);
3733 return NON_QI_REG_P (op);
3737 zero_extended_scalar_load_operand (rtx op,
3738 enum machine_mode mode ATTRIBUTE_UNUSED)
3741 if (GET_CODE (op) != MEM)
3743 op = maybe_get_pool_constant (op);
3746 if (GET_CODE (op) != CONST_VECTOR)
3749 (GET_MODE_SIZE (GET_MODE (op)) /
3750 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3751 for (n_elts--; n_elts > 0; n_elts--)
3753 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3754 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3760 /* Return 1 when OP is operand acceptable for standard SSE move. */
3762 vector_move_operand (rtx op, enum machine_mode mode)
3764 if (nonimmediate_operand (op, mode))
3766 if (GET_MODE (op) != mode && mode != VOIDmode)
3768 return (op == CONST0_RTX (GET_MODE (op)));
3771 /* Return true if op if a valid address, and does not contain
3772 a segment override. */
3775 no_seg_address_operand (register rtx op, enum machine_mode mode)
3777 struct ix86_address parts;
3779 if (! address_operand (op, mode))
3782 if (! ix86_decompose_address (op, &parts))
3785 return parts.seg == SEG_DEFAULT;
3788 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3791 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3793 enum rtx_code code = GET_CODE (op);
3796 /* Operations supported directly. */
3806 /* These are equivalent to ones above in non-IEEE comparisons. */
3813 return !TARGET_IEEE_FP;
3818 /* Return 1 if OP is a valid comparison operator in valid mode. */
3820 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3822 enum machine_mode inmode;
3823 enum rtx_code code = GET_CODE (op);
3824 if (mode != VOIDmode && GET_MODE (op) != mode)
3826 if (GET_RTX_CLASS (code) != '<')
3828 inmode = GET_MODE (XEXP (op, 0));
3830 if (inmode == CCFPmode || inmode == CCFPUmode)
3832 enum rtx_code second_code, bypass_code;
3833 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3834 return (bypass_code == NIL && second_code == NIL);
3841 if (inmode == CCmode || inmode == CCGCmode
3842 || inmode == CCGOCmode || inmode == CCNOmode)
3845 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3846 if (inmode == CCmode)
3850 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3858 /* Return 1 if OP is a valid comparison operator testing carry flag
3861 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3863 enum machine_mode inmode;
3864 enum rtx_code code = GET_CODE (op);
3866 if (mode != VOIDmode && GET_MODE (op) != mode)
3868 if (GET_RTX_CLASS (code) != '<')
3870 inmode = GET_MODE (XEXP (op, 0));
3871 if (GET_CODE (XEXP (op, 0)) != REG
3872 || REGNO (XEXP (op, 0)) != 17
3873 || XEXP (op, 1) != const0_rtx)
3876 if (inmode == CCFPmode || inmode == CCFPUmode)
3878 enum rtx_code second_code, bypass_code;
3880 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3881 if (bypass_code != NIL || second_code != NIL)
3883 code = ix86_fp_compare_code_to_integer (code);
3885 else if (inmode != CCmode)
3890 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3893 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3895 enum machine_mode inmode;
3896 enum rtx_code code = GET_CODE (op);
3898 if (mode != VOIDmode && GET_MODE (op) != mode)
3900 if (GET_RTX_CLASS (code) != '<')
3902 inmode = GET_MODE (XEXP (op, 0));
3903 if (inmode == CCFPmode || inmode == CCFPUmode)
3905 enum rtx_code second_code, bypass_code;
3907 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3908 if (bypass_code != NIL || second_code != NIL)
3910 code = ix86_fp_compare_code_to_integer (code);
3912 /* i387 supports just limited amount of conditional codes. */
3915 case LTU: case GTU: case LEU: case GEU:
3916 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3919 case ORDERED: case UNORDERED:
3927 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3930 promotable_binary_operator (register rtx op,
3931 enum machine_mode mode ATTRIBUTE_UNUSED)
3933 switch (GET_CODE (op))
3936 /* Modern CPUs have same latency for HImode and SImode multiply,
3937 but 386 and 486 do HImode multiply faster. */
3938 return ix86_tune > PROCESSOR_I486;
3950 /* Nearly general operand, but accept any const_double, since we wish
3951 to be able to drop them into memory rather than have them get pulled
3955 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
3957 if (mode != VOIDmode && mode != GET_MODE (op))
3959 if (GET_CODE (op) == CONST_DOUBLE)
3961 return general_operand (op, mode);
3964 /* Match an SI or HImode register for a zero_extract. */
3967 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3970 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3971 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3974 if (!register_operand (op, VOIDmode))
3977 /* Be careful to accept only registers having upper parts. */
3978 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3979 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3982 /* Return 1 if this is a valid binary floating-point operation.
3983 OP is the expression matched, and MODE is its mode. */
3986 binary_fp_operator (register rtx op, enum machine_mode mode)
3988 if (mode != VOIDmode && mode != GET_MODE (op))
3991 switch (GET_CODE (op))
3997 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4005 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4007 return GET_CODE (op) == MULT;
4011 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4013 return GET_CODE (op) == DIV;
4017 arith_or_logical_operator (rtx op, enum machine_mode mode)
4019 return ((mode == VOIDmode || GET_MODE (op) == mode)
4020 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4021 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4024 /* Returns 1 if OP is memory operand with a displacement. */
4027 memory_displacement_operand (register rtx op, enum machine_mode mode)
4029 struct ix86_address parts;
4031 if (! memory_operand (op, mode))
4034 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4037 return parts.disp != NULL_RTX;
4040 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4041 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4043 ??? It seems likely that this will only work because cmpsi is an
4044 expander, and no actual insns use this. */
4047 cmpsi_operand (rtx op, enum machine_mode mode)
4049 if (nonimmediate_operand (op, mode))
4052 if (GET_CODE (op) == AND
4053 && GET_MODE (op) == SImode
4054 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4055 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4056 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4057 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4058 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4059 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4065 /* Returns 1 if OP is memory operand that can not be represented by the
4069 long_memory_operand (register rtx op, enum machine_mode mode)
4071 if (! memory_operand (op, mode))
4074 return memory_address_length (op) != 0;
4077 /* Return nonzero if the rtx is known aligned. */
4080 aligned_operand (rtx op, enum machine_mode mode)
4082 struct ix86_address parts;
4084 if (!general_operand (op, mode))
4087 /* Registers and immediate operands are always "aligned". */
4088 if (GET_CODE (op) != MEM)
4091 /* Don't even try to do any aligned optimizations with volatiles. */
4092 if (MEM_VOLATILE_P (op))
4097 /* Pushes and pops are only valid on the stack pointer. */
4098 if (GET_CODE (op) == PRE_DEC
4099 || GET_CODE (op) == POST_INC)
4102 /* Decode the address. */
4103 if (! ix86_decompose_address (op, &parts))
4106 if (parts.base && GET_CODE (parts.base) == SUBREG)
4107 parts.base = SUBREG_REG (parts.base);
4108 if (parts.index && GET_CODE (parts.index) == SUBREG)
4109 parts.index = SUBREG_REG (parts.index);
4111 /* Look for some component that isn't known to be aligned. */
4115 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4120 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4125 if (GET_CODE (parts.disp) != CONST_INT
4126 || (INTVAL (parts.disp) & 3) != 0)
4130 /* Didn't find one -- this must be an aligned address. */
4134 /* Initialize the table of extra 80387 mathematical constants. */
4137 init_ext_80387_constants (void)
4139 static const char * cst[5] =
4141 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4142 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4143 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4144 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4145 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4149 for (i = 0; i < 5; i++)
4151 real_from_string (&ext_80387_constants_table[i], cst[i]);
4152 /* Ensure each constant is rounded to XFmode precision. */
4153 real_convert (&ext_80387_constants_table[i],
4154 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4155 &ext_80387_constants_table[i]);
4158 ext_80387_constants_init = 1;
4161 /* Return true if the constant is something that can be loaded with
4162 a special instruction. */
4165 standard_80387_constant_p (rtx x)
4167 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4170 if (x == CONST0_RTX (GET_MODE (x)))
4172 if (x == CONST1_RTX (GET_MODE (x)))
4175 /* For XFmode constants, try to find a special 80387 instruction on
4176 those CPUs that benefit from them. */
4177 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4178 && x86_ext_80387_constants & TUNEMASK)
4183 if (! ext_80387_constants_init)
4184 init_ext_80387_constants ();
4186 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4187 for (i = 0; i < 5; i++)
4188 if (real_identical (&r, &ext_80387_constants_table[i]))
4195 /* Return the opcode of the special instruction to be used to load
4199 standard_80387_constant_opcode (rtx x)
4201 switch (standard_80387_constant_p (x))
4221 /* Return the CONST_DOUBLE representing the 80387 constant that is
4222 loaded by the specified special instruction. The argument IDX
4223 matches the return value from standard_80387_constant_p. */
4226 standard_80387_constant_rtx (int idx)
4230 if (! ext_80387_constants_init)
4231 init_ext_80387_constants ();
4247 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4248 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4251 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4254 standard_sse_constant_p (rtx x)
4256 if (x == const0_rtx)
4258 return (x == CONST0_RTX (GET_MODE (x)));
4261 /* Returns 1 if OP contains a symbol reference */
4264 symbolic_reference_mentioned_p (rtx op)
4266 register const char *fmt;
4269 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4272 fmt = GET_RTX_FORMAT (GET_CODE (op));
4273 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4279 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4280 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4284 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4291 /* Return 1 if it is appropriate to emit `ret' instructions in the
4292 body of a function. Do this only if the epilogue is simple, needing a
4293 couple of insns. Prior to reloading, we can't tell how many registers
4294 must be saved, so return 0 then. Return 0 if there is no frame
4295 marker to de-allocate.
4297 If NON_SAVING_SETJMP is defined and true, then it is not possible
4298 for the epilogue to be simple, so return 0. This is a special case
4299 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4300 until final, but jump_optimize may need to know sooner if a
4304 ix86_can_use_return_insn_p (void)
4306 struct ix86_frame frame;
4308 #ifdef NON_SAVING_SETJMP
4309 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4313 if (! reload_completed || frame_pointer_needed)
4316 /* Don't allow more than 32 pop, since that's all we can do
4317 with one instruction. */
4318 if (current_function_pops_args
4319 && current_function_args_size >= 32768)
4322 ix86_compute_frame_layout (&frame);
4323 return frame.to_allocate == 0 && frame.nregs == 0;
4326 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4328 x86_64_sign_extended_value (rtx value)
4330 switch (GET_CODE (value))
4332 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4333 to be at least 32 and this all acceptable constants are
4334 represented as CONST_INT. */
4336 if (HOST_BITS_PER_WIDE_INT == 32)
4340 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4341 return trunc_int_for_mode (val, SImode) == val;
4345 /* For certain code models, the symbolic references are known to fit.
4346 in CM_SMALL_PIC model we know it fits if it is local to the shared
4347 library. Don't count TLS SYMBOL_REFs here, since they should fit
4348 only if inside of UNSPEC handled below. */
4350 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4352 /* For certain code models, the code is near as well. */
4354 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4355 || ix86_cmodel == CM_KERNEL);
4357 /* We also may accept the offsetted memory references in certain special
4360 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4361 switch (XINT (XEXP (value, 0), 1))
4363 case UNSPEC_GOTPCREL:
4365 case UNSPEC_GOTNTPOFF:
4371 if (GET_CODE (XEXP (value, 0)) == PLUS)
4373 rtx op1 = XEXP (XEXP (value, 0), 0);
4374 rtx op2 = XEXP (XEXP (value, 0), 1);
4375 HOST_WIDE_INT offset;
4377 if (ix86_cmodel == CM_LARGE)
4379 if (GET_CODE (op2) != CONST_INT)
4381 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4382 switch (GET_CODE (op1))
4385 /* For CM_SMALL assume that latest object is 16MB before
4386 end of 31bits boundary. We may also accept pretty
4387 large negative constants knowing that all objects are
4388 in the positive half of address space. */
4389 if (ix86_cmodel == CM_SMALL
4390 && offset < 16*1024*1024
4391 && trunc_int_for_mode (offset, SImode) == offset)
4393 /* For CM_KERNEL we know that all object resist in the
4394 negative half of 32bits address space. We may not
4395 accept negative offsets, since they may be just off
4396 and we may accept pretty large positive ones. */
4397 if (ix86_cmodel == CM_KERNEL
4399 && trunc_int_for_mode (offset, SImode) == offset)
4403 /* These conditions are similar to SYMBOL_REF ones, just the
4404 constraints for code models differ. */
4405 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4406 && offset < 16*1024*1024
4407 && trunc_int_for_mode (offset, SImode) == offset)
4409 if (ix86_cmodel == CM_KERNEL
4411 && trunc_int_for_mode (offset, SImode) == offset)
4415 switch (XINT (op1, 1))
4420 && trunc_int_for_mode (offset, SImode) == offset)
4434 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4436 x86_64_zero_extended_value (rtx value)
4438 switch (GET_CODE (value))
4441 if (HOST_BITS_PER_WIDE_INT == 32)
4442 return (GET_MODE (value) == VOIDmode
4443 && !CONST_DOUBLE_HIGH (value));
4447 if (HOST_BITS_PER_WIDE_INT == 32)
4448 return INTVAL (value) >= 0;
4450 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4453 /* For certain code models, the symbolic references are known to fit. */
4455 return ix86_cmodel == CM_SMALL;
4457 /* For certain code models, the code is near as well. */
4459 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4461 /* We also may accept the offsetted memory references in certain special
4464 if (GET_CODE (XEXP (value, 0)) == PLUS)
4466 rtx op1 = XEXP (XEXP (value, 0), 0);
4467 rtx op2 = XEXP (XEXP (value, 0), 1);
4469 if (ix86_cmodel == CM_LARGE)
4471 switch (GET_CODE (op1))
4475 /* For small code model we may accept pretty large positive
4476 offsets, since one bit is available for free. Negative
4477 offsets are limited by the size of NULL pointer area
4478 specified by the ABI. */
4479 if (ix86_cmodel == CM_SMALL
4480 && GET_CODE (op2) == CONST_INT
4481 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4482 && (trunc_int_for_mode (INTVAL (op2), SImode)
4485 /* ??? For the kernel, we may accept adjustment of
4486 -0x10000000, since we know that it will just convert
4487 negative address space to positive, but perhaps this
4488 is not worthwhile. */
4491 /* These conditions are similar to SYMBOL_REF ones, just the
4492 constraints for code models differ. */
4493 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4494 && GET_CODE (op2) == CONST_INT
4495 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4496 && (trunc_int_for_mode (INTVAL (op2), SImode)
4510 /* Value should be nonzero if functions must have frame pointers.
4511 Zero means the frame pointer need not be set up (and parms may
4512 be accessed via the stack pointer) in functions that seem suitable. */
4515 ix86_frame_pointer_required (void)
4517 /* If we accessed previous frames, then the generated code expects
4518 to be able to access the saved ebp value in our frame. */
4519 if (cfun->machine->accesses_prev_frame)
4522 /* Several x86 os'es need a frame pointer for other reasons,
4523 usually pertaining to setjmp. */
4524 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4527 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4528 the frame pointer by default. Turn it back on now if we've not
4529 got a leaf function. */
4530 if (TARGET_OMIT_LEAF_FRAME_POINTER
4531 && (!current_function_is_leaf))
4534 if (current_function_profile)
4540 /* Record that the current function accesses previous call frames. */
4543 ix86_setup_frame_addresses (void)
4545 cfun->machine->accesses_prev_frame = 1;
4548 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4549 # define USE_HIDDEN_LINKONCE 1
4551 # define USE_HIDDEN_LINKONCE 0
4554 static int pic_labels_used;
4556 /* Fills in the label name that should be used for a pc thunk for
4557 the given register. */
4560 get_pc_thunk_name (char name[32], unsigned int regno)
4562 if (USE_HIDDEN_LINKONCE)
4563 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4565 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4569 /* This function generates code for -fpic that loads %ebx with
4570 the return address of the caller and then returns. */
4573 ix86_file_end (void)
4578 for (regno = 0; regno < 8; ++regno)
4582 if (! ((pic_labels_used >> regno) & 1))
4585 get_pc_thunk_name (name, regno);
4587 if (USE_HIDDEN_LINKONCE)
4591 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4593 TREE_PUBLIC (decl) = 1;
4594 TREE_STATIC (decl) = 1;
4595 DECL_ONE_ONLY (decl) = 1;
4597 (*targetm.asm_out.unique_section) (decl, 0);
4598 named_section (decl, NULL, 0);
4600 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4601 fputs ("\t.hidden\t", asm_out_file);
4602 assemble_name (asm_out_file, name);
4603 fputc ('\n', asm_out_file);
4604 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4609 ASM_OUTPUT_LABEL (asm_out_file, name);
4612 xops[0] = gen_rtx_REG (SImode, regno);
4613 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4614 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4615 output_asm_insn ("ret", xops);
4618 if (NEED_INDICATE_EXEC_STACK)
4619 file_end_indicate_exec_stack ();
4622 /* Emit code for the SET_GOT patterns. */
4625 output_set_got (rtx dest)
4630 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4632 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4634 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4637 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4639 output_asm_insn ("call\t%a2", xops);
4642 /* Output the "canonical" label name ("Lxx$pb") here too. This
4643 is what will be referred to by the Mach-O PIC subsystem. */
4644 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4646 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4647 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4650 output_asm_insn ("pop{l}\t%0", xops);
4655 get_pc_thunk_name (name, REGNO (dest));
4656 pic_labels_used |= 1 << REGNO (dest);
4658 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4659 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4660 output_asm_insn ("call\t%X2", xops);
4663 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4664 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4665 else if (!TARGET_MACHO)
4666 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4671 /* Generate an "push" pattern for input ARG. */
4676 return gen_rtx_SET (VOIDmode,
4678 gen_rtx_PRE_DEC (Pmode,
4679 stack_pointer_rtx)),
4683 /* Return >= 0 if there is an unused call-clobbered register available
4684 for the entire function. */
4687 ix86_select_alt_pic_regnum (void)
4689 if (current_function_is_leaf && !current_function_profile)
4692 for (i = 2; i >= 0; --i)
4693 if (!regs_ever_live[i])
4697 return INVALID_REGNUM;
4700 /* Return 1 if we need to save REGNO. */
4702 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4704 if (pic_offset_table_rtx
4705 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4706 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4707 || current_function_profile
4708 || current_function_calls_eh_return
4709 || current_function_uses_const_pool))
4711 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4716 if (current_function_calls_eh_return && maybe_eh_return)
4721 unsigned test = EH_RETURN_DATA_REGNO (i);
4722 if (test == INVALID_REGNUM)
4729 return (regs_ever_live[regno]
4730 && !call_used_regs[regno]
4731 && !fixed_regs[regno]
4732 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4735 /* Return number of registers to be saved on the stack. */
4738 ix86_nsaved_regs (void)
4743 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4744 if (ix86_save_reg (regno, true))
4749 /* Return the offset between two registers, one to be eliminated, and the other
4750 its replacement, at the start of a routine. */
4753 ix86_initial_elimination_offset (int from, int to)
4755 struct ix86_frame frame;
4756 ix86_compute_frame_layout (&frame);
4758 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4759 return frame.hard_frame_pointer_offset;
4760 else if (from == FRAME_POINTER_REGNUM
4761 && to == HARD_FRAME_POINTER_REGNUM)
4762 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4765 if (to != STACK_POINTER_REGNUM)
4767 else if (from == ARG_POINTER_REGNUM)
4768 return frame.stack_pointer_offset;
4769 else if (from != FRAME_POINTER_REGNUM)
4772 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4776 /* Fill structure ix86_frame about frame of currently computed function. */
4779 ix86_compute_frame_layout (struct ix86_frame *frame)
4781 HOST_WIDE_INT total_size;
4782 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4784 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4785 HOST_WIDE_INT size = get_frame_size ();
4787 frame->nregs = ix86_nsaved_regs ();
4790 /* During reload iteration the amount of registers saved can change.
4791 Recompute the value as needed. Do not recompute when amount of registers
4792 didn't change as reload does mutiple calls to the function and does not
4793 expect the decision to change within single iteration. */
4795 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4797 int count = frame->nregs;
4799 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4800 /* The fast prologue uses move instead of push to save registers. This
4801 is significantly longer, but also executes faster as modern hardware
4802 can execute the moves in parallel, but can't do that for push/pop.
4804 Be careful about choosing what prologue to emit: When function takes
4805 many instructions to execute we may use slow version as well as in
4806 case function is known to be outside hot spot (this is known with
4807 feedback only). Weight the size of function by number of registers
4808 to save as it is cheap to use one or two push instructions but very
4809 slow to use many of them. */
4811 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4812 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4813 || (flag_branch_probabilities
4814 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4815 cfun->machine->use_fast_prologue_epilogue = false;
4817 cfun->machine->use_fast_prologue_epilogue
4818 = !expensive_function_p (count);
4820 if (TARGET_PROLOGUE_USING_MOVE
4821 && cfun->machine->use_fast_prologue_epilogue)
4822 frame->save_regs_using_mov = true;
4824 frame->save_regs_using_mov = false;
4827 /* Skip return address and saved base pointer. */
4828 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4830 frame->hard_frame_pointer_offset = offset;
4832 /* Do some sanity checking of stack_alignment_needed and
4833 preferred_alignment, since i386 port is the only using those features
4834 that may break easily. */
4836 if (size && !stack_alignment_needed)
4838 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4840 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4842 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4845 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4846 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4848 /* Register save area */
4849 offset += frame->nregs * UNITS_PER_WORD;
4852 if (ix86_save_varrargs_registers)
4854 offset += X86_64_VARARGS_SIZE;
4855 frame->va_arg_size = X86_64_VARARGS_SIZE;
4858 frame->va_arg_size = 0;
4860 /* Align start of frame for local function. */
4861 frame->padding1 = ((offset + stack_alignment_needed - 1)
4862 & -stack_alignment_needed) - offset;
4864 offset += frame->padding1;
4866 /* Frame pointer points here. */
4867 frame->frame_pointer_offset = offset;
4871 /* Add outgoing arguments area. Can be skipped if we eliminated
4872 all the function calls as dead code. */
4873 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4875 offset += current_function_outgoing_args_size;
4876 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4879 frame->outgoing_arguments_size = 0;
4881 /* Align stack boundary. Only needed if we're calling another function
4883 if (!current_function_is_leaf || current_function_calls_alloca)
4884 frame->padding2 = ((offset + preferred_alignment - 1)
4885 & -preferred_alignment) - offset;
4887 frame->padding2 = 0;
4889 offset += frame->padding2;
4891 /* We've reached end of stack frame. */
4892 frame->stack_pointer_offset = offset;
4894 /* Size prologue needs to allocate. */
4895 frame->to_allocate =
4896 (size + frame->padding1 + frame->padding2
4897 + frame->outgoing_arguments_size + frame->va_arg_size);
4899 if (!frame->to_allocate && frame->nregs <= 1)
4900 frame->save_regs_using_mov = false;
4902 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4903 && current_function_is_leaf)
4905 frame->red_zone_size = frame->to_allocate;
4906 if (frame->save_regs_using_mov)
4907 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4908 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4909 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4912 frame->red_zone_size = 0;
4913 frame->to_allocate -= frame->red_zone_size;
4914 frame->stack_pointer_offset -= frame->red_zone_size;
4916 fprintf (stderr, "nregs: %i\n", frame->nregs);
4917 fprintf (stderr, "size: %i\n", size);
4918 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4919 fprintf (stderr, "padding1: %i\n", frame->padding1);
4920 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4921 fprintf (stderr, "padding2: %i\n", frame->padding2);
4922 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4923 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4924 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4925 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4926 frame->hard_frame_pointer_offset);
4927 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4931 /* Emit code to save registers in the prologue. */
4934 ix86_emit_save_regs (void)
4939 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4940 if (ix86_save_reg (regno, true))
4942 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4943 RTX_FRAME_RELATED_P (insn) = 1;
4947 /* Emit code to save registers using MOV insns. First register
4948 is restored from POINTER + OFFSET. */
4950 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4955 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4956 if (ix86_save_reg (regno, true))
4958 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4960 gen_rtx_REG (Pmode, regno));
4961 RTX_FRAME_RELATED_P (insn) = 1;
4962 offset += UNITS_PER_WORD;
4966 /* Expand the prologue into a bunch of separate insns. */
4969 ix86_expand_prologue (void)
4973 struct ix86_frame frame;
4974 HOST_WIDE_INT allocate;
4976 ix86_compute_frame_layout (&frame);
4978 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4979 slower on all targets. Also sdb doesn't like it. */
4981 if (frame_pointer_needed)
4983 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4984 RTX_FRAME_RELATED_P (insn) = 1;
4986 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4987 RTX_FRAME_RELATED_P (insn) = 1;
4990 allocate = frame.to_allocate;
4992 if (!frame.save_regs_using_mov)
4993 ix86_emit_save_regs ();
4995 allocate += frame.nregs * UNITS_PER_WORD;
4997 /* When using red zone we may start register saving before allocating
4998 the stack frame saving one cycle of the prologue. */
4999 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5000 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5001 : stack_pointer_rtx,
5002 -frame.nregs * UNITS_PER_WORD);
5006 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5008 insn = emit_insn (gen_pro_epilogue_adjust_stack
5009 (stack_pointer_rtx, stack_pointer_rtx,
5010 GEN_INT (-allocate)));
5011 RTX_FRAME_RELATED_P (insn) = 1;
5015 /* ??? Is this only valid for Win32? */
5022 arg0 = gen_rtx_REG (SImode, 0);
5023 emit_move_insn (arg0, GEN_INT (allocate));
5025 sym = gen_rtx_MEM (FUNCTION_MODE,
5026 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5027 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5029 CALL_INSN_FUNCTION_USAGE (insn)
5030 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5031 CALL_INSN_FUNCTION_USAGE (insn));
5033 /* Don't allow scheduling pass to move insns across __alloca
5035 emit_insn (gen_blockage (const0_rtx));
5037 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5039 if (!frame_pointer_needed || !frame.to_allocate)
5040 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5042 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5043 -frame.nregs * UNITS_PER_WORD);
5046 pic_reg_used = false;
5047 if (pic_offset_table_rtx
5048 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5049 || current_function_profile))
5051 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5053 if (alt_pic_reg_used != INVALID_REGNUM)
5054 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5056 pic_reg_used = true;
5061 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5063 /* Even with accurate pre-reload life analysis, we can wind up
5064 deleting all references to the pic register after reload.
5065 Consider if cross-jumping unifies two sides of a branch
5066 controlled by a comparison vs the only read from a global.
5067 In which case, allow the set_got to be deleted, though we're
5068 too late to do anything about the ebx save in the prologue. */
5069 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5072 /* Prevent function calls from be scheduled before the call to mcount.
5073 In the pic_reg_used case, make sure that the got load isn't deleted. */
5074 if (current_function_profile)
5075 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5078 /* Emit code to restore saved registers using MOV insns. First register
5079 is restored from POINTER + OFFSET. */
5081 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5085 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5086 if (ix86_save_reg (regno, maybe_eh_return))
5088 emit_move_insn (gen_rtx_REG (Pmode, regno),
5089 adjust_address (gen_rtx_MEM (Pmode, pointer),
5091 offset += UNITS_PER_WORD;
5095 /* Restore function stack, frame, and registers. */
5098 ix86_expand_epilogue (int style)
5101 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5102 struct ix86_frame frame;
5103 HOST_WIDE_INT offset;
5105 ix86_compute_frame_layout (&frame);
5107 /* Calculate start of saved registers relative to ebp. Special care
5108 must be taken for the normal return case of a function using
5109 eh_return: the eax and edx registers are marked as saved, but not
5110 restored along this path. */
5111 offset = frame.nregs;
5112 if (current_function_calls_eh_return && style != 2)
5114 offset *= -UNITS_PER_WORD;
5116 /* If we're only restoring one register and sp is not valid then
5117 using a move instruction to restore the register since it's
5118 less work than reloading sp and popping the register.
5120 The default code result in stack adjustment using add/lea instruction,
5121 while this code results in LEAVE instruction (or discrete equivalent),
5122 so it is profitable in some other cases as well. Especially when there
5123 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5124 and there is exactly one register to pop. This heuristic may need some
5125 tuning in future. */
5126 if ((!sp_valid && frame.nregs <= 1)
5127 || (TARGET_EPILOGUE_USING_MOVE
5128 && cfun->machine->use_fast_prologue_epilogue
5129 && (frame.nregs > 1 || frame.to_allocate))
5130 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5131 || (frame_pointer_needed && TARGET_USE_LEAVE
5132 && cfun->machine->use_fast_prologue_epilogue
5133 && frame.nregs == 1)
5134 || current_function_calls_eh_return)
5136 /* Restore registers. We can use ebp or esp to address the memory
5137 locations. If both are available, default to ebp, since offsets
5138 are known to be small. Only exception is esp pointing directly to the
5139 end of block of saved registers, where we may simplify addressing
5142 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5143 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5144 frame.to_allocate, style == 2);
5146 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5147 offset, style == 2);
5149 /* eh_return epilogues need %ecx added to the stack pointer. */
5152 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5154 if (frame_pointer_needed)
5156 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5157 tmp = plus_constant (tmp, UNITS_PER_WORD);
5158 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5160 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5161 emit_move_insn (hard_frame_pointer_rtx, tmp);
5163 emit_insn (gen_pro_epilogue_adjust_stack
5164 (stack_pointer_rtx, sa, const0_rtx));
5168 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5169 tmp = plus_constant (tmp, (frame.to_allocate
5170 + frame.nregs * UNITS_PER_WORD));
5171 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5174 else if (!frame_pointer_needed)
5175 emit_insn (gen_pro_epilogue_adjust_stack
5176 (stack_pointer_rtx, stack_pointer_rtx,
5177 GEN_INT (frame.to_allocate
5178 + frame.nregs * UNITS_PER_WORD)));
5179 /* If not an i386, mov & pop is faster than "leave". */
5180 else if (TARGET_USE_LEAVE || optimize_size
5181 || !cfun->machine->use_fast_prologue_epilogue)
5182 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5185 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5186 hard_frame_pointer_rtx,
5189 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5191 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5196 /* First step is to deallocate the stack frame so that we can
5197 pop the registers. */
5200 if (!frame_pointer_needed)
5202 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5203 hard_frame_pointer_rtx,
5206 else if (frame.to_allocate)
5207 emit_insn (gen_pro_epilogue_adjust_stack
5208 (stack_pointer_rtx, stack_pointer_rtx,
5209 GEN_INT (frame.to_allocate)));
5211 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5212 if (ix86_save_reg (regno, false))
5215 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5217 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5219 if (frame_pointer_needed)
5221 /* Leave results in shorter dependency chains on CPUs that are
5222 able to grok it fast. */
5223 if (TARGET_USE_LEAVE)
5224 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5225 else if (TARGET_64BIT)
5226 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5228 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5232 /* Sibcall epilogues don't want a return instruction. */
5236 if (current_function_pops_args && current_function_args_size)
5238 rtx popc = GEN_INT (current_function_pops_args);
5240 /* i386 can only pop 64K bytes. If asked to pop more, pop
5241 return address, do explicit add, and jump indirectly to the
5244 if (current_function_pops_args >= 65536)
5246 rtx ecx = gen_rtx_REG (SImode, 2);
5248 /* There are is no "pascal" calling convention in 64bit ABI. */
5252 emit_insn (gen_popsi1 (ecx));
5253 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5254 emit_jump_insn (gen_return_indirect_internal (ecx));
5257 emit_jump_insn (gen_return_pop_internal (popc));
5260 emit_jump_insn (gen_return_internal ());
5263 /* Reset from the function's potential modifications. */
5266 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5267 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5269 if (pic_offset_table_rtx)
5270 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5273 /* Extract the parts of an RTL expression that is a valid memory address
5274 for an instruction. Return 0 if the structure of the address is
5275 grossly off. Return -1 if the address contains ASHIFT, so it is not
5276 strictly valid, but still used for computing length of lea instruction. */
5279 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5281 rtx base = NULL_RTX;
5282 rtx index = NULL_RTX;
5283 rtx disp = NULL_RTX;
5284 HOST_WIDE_INT scale = 1;
5285 rtx scale_rtx = NULL_RTX;
5287 enum ix86_address_seg seg = SEG_DEFAULT;
5289 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5291 else if (GET_CODE (addr) == PLUS)
5301 addends[n++] = XEXP (op, 1);
5304 while (GET_CODE (op) == PLUS);
5309 for (i = n; i >= 0; --i)
5312 switch (GET_CODE (op))
5317 index = XEXP (op, 0);
5318 scale_rtx = XEXP (op, 1);
5322 if (XINT (op, 1) == UNSPEC_TP
5323 && TARGET_TLS_DIRECT_SEG_REFS
5324 && seg == SEG_DEFAULT)
5325 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5354 else if (GET_CODE (addr) == MULT)
5356 index = XEXP (addr, 0); /* index*scale */
5357 scale_rtx = XEXP (addr, 1);
5359 else if (GET_CODE (addr) == ASHIFT)
5363 /* We're called for lea too, which implements ashift on occasion. */
5364 index = XEXP (addr, 0);
5365 tmp = XEXP (addr, 1);
5366 if (GET_CODE (tmp) != CONST_INT)
5368 scale = INTVAL (tmp);
5369 if ((unsigned HOST_WIDE_INT) scale > 3)
5375 disp = addr; /* displacement */
5377 /* Extract the integral value of scale. */
5380 if (GET_CODE (scale_rtx) != CONST_INT)
5382 scale = INTVAL (scale_rtx);
5385 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5386 if (base && index && scale == 1
5387 && (index == arg_pointer_rtx
5388 || index == frame_pointer_rtx
5389 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5397 if ((base == hard_frame_pointer_rtx
5398 || base == frame_pointer_rtx
5399 || base == arg_pointer_rtx) && !disp)
5402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5403 Avoid this by transforming to [%esi+0]. */
5404 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5405 && base && !index && !disp
5407 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5410 /* Special case: encode reg+reg instead of reg*2. */
5411 if (!base && index && scale && scale == 2)
5412 base = index, scale = 1;
5414 /* Special case: scaling cannot be encoded without base or displacement. */
5415 if (!base && !disp && index && scale != 1)
5427 /* Return cost of the memory address x.
5428 For i386, it is better to use a complex address than let gcc copy
5429 the address into a reg and make a new pseudo. But not if the address
5430 requires to two regs - that would mean more pseudos with longer
5433 ix86_address_cost (rtx x)
5435 struct ix86_address parts;
5438 if (!ix86_decompose_address (x, &parts))
5441 if (parts.base && GET_CODE (parts.base) == SUBREG)
5442 parts.base = SUBREG_REG (parts.base);
5443 if (parts.index && GET_CODE (parts.index) == SUBREG)
5444 parts.index = SUBREG_REG (parts.index);
5446 /* More complex memory references are better. */
5447 if (parts.disp && parts.disp != const0_rtx)
5449 if (parts.seg != SEG_DEFAULT)
5452 /* Attempt to minimize number of registers in the address. */
5454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5456 && (!REG_P (parts.index)
5457 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5461 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5463 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5464 && parts.base != parts.index)
5467 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5468 since it's predecode logic can't detect the length of instructions
5469 and it degenerates to vector decoded. Increase cost of such
5470 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5471 to split such addresses or even refuse such addresses at all.
5473 Following addressing modes are affected:
5478 The first and last case may be avoidable by explicitly coding the zero in
5479 memory address, but I don't have AMD-K6 machine handy to check this
5483 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5484 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5485 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5491 /* If X is a machine specific address (i.e. a symbol or label being
5492 referenced as a displacement from the GOT implemented using an
5493 UNSPEC), then return the base term. Otherwise return X. */
5496 ix86_find_base_term (rtx x)
5502 if (GET_CODE (x) != CONST)
5505 if (GET_CODE (term) == PLUS
5506 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5507 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5508 term = XEXP (term, 0);
5509 if (GET_CODE (term) != UNSPEC
5510 || XINT (term, 1) != UNSPEC_GOTPCREL)
5513 term = XVECEXP (term, 0, 0);
5515 if (GET_CODE (term) != SYMBOL_REF
5516 && GET_CODE (term) != LABEL_REF)
5522 term = ix86_delegitimize_address (x);
5524 if (GET_CODE (term) != SYMBOL_REF
5525 && GET_CODE (term) != LABEL_REF)
5531 /* Determine if a given RTX is a valid constant. We already know this
5532 satisfies CONSTANT_P. */
5535 legitimate_constant_p (rtx x)
5539 switch (GET_CODE (x))
5542 /* TLS symbols are not constant. */
5543 if (tls_symbolic_operand (x, Pmode))
5548 inner = XEXP (x, 0);
5550 /* Offsets of TLS symbols are never valid.
5551 Discourage CSE from creating them. */
5552 if (GET_CODE (inner) == PLUS
5553 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5556 if (GET_CODE (inner) == PLUS)
5558 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5560 inner = XEXP (inner, 0);
5563 /* Only some unspecs are valid as "constants". */
5564 if (GET_CODE (inner) == UNSPEC)
5565 switch (XINT (inner, 1))
5569 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5571 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5581 /* Otherwise we handle everything else in the move patterns. */
5585 /* Determine if it's legal to put X into the constant pool. This
5586 is not possible for the address of thread-local symbols, which
5587 is checked above. */
5590 ix86_cannot_force_const_mem (rtx x)
5592 return !legitimate_constant_p (x);
5595 /* Determine if a given RTX is a valid constant address. */
5598 constant_address_p (rtx x)
5600 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5603 /* Nonzero if the constant value X is a legitimate general operand
5604 when generating PIC code. It is given that flag_pic is on and
5605 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5608 legitimate_pic_operand_p (rtx x)
5612 switch (GET_CODE (x))
5615 inner = XEXP (x, 0);
5617 /* Only some unspecs are valid as "constants". */
5618 if (GET_CODE (inner) == UNSPEC)
5619 switch (XINT (inner, 1))
5622 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5630 return legitimate_pic_address_disp_p (x);
5637 /* Determine if a given CONST RTX is a valid memory displacement
5641 legitimate_pic_address_disp_p (register rtx disp)
5645 /* In 64bit mode we can allow direct addresses of symbols and labels
5646 when they are not dynamic symbols. */
5649 /* TLS references should always be enclosed in UNSPEC. */
5650 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5652 if (GET_CODE (disp) == SYMBOL_REF
5653 && ix86_cmodel == CM_SMALL_PIC
5654 && SYMBOL_REF_LOCAL_P (disp))
5656 if (GET_CODE (disp) == LABEL_REF)
5658 if (GET_CODE (disp) == CONST
5659 && GET_CODE (XEXP (disp, 0)) == PLUS
5660 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5661 && ix86_cmodel == CM_SMALL_PIC
5662 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5663 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5664 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5665 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5666 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5669 if (GET_CODE (disp) != CONST)
5671 disp = XEXP (disp, 0);
5675 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5676 of GOT tables. We should not need these anyway. */
5677 if (GET_CODE (disp) != UNSPEC
5678 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5681 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5682 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5688 if (GET_CODE (disp) == PLUS)
5690 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5692 disp = XEXP (disp, 0);
5696 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5697 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5699 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5700 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5701 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5703 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5704 if (strstr (sym_name, "$pb") != 0)
5709 if (GET_CODE (disp) != UNSPEC)
5712 switch (XINT (disp, 1))
5717 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5719 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5720 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5721 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5723 case UNSPEC_GOTTPOFF:
5724 case UNSPEC_GOTNTPOFF:
5725 case UNSPEC_INDNTPOFF:
5728 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5730 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5732 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5738 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5739 memory address for an instruction. The MODE argument is the machine mode
5740 for the MEM expression that wants to use this address.
5742 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5743 convert common non-canonical forms to canonical form so that they will
5747 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5749 struct ix86_address parts;
5750 rtx base, index, disp;
5751 HOST_WIDE_INT scale;
5752 const char *reason = NULL;
5753 rtx reason_rtx = NULL_RTX;
5755 if (TARGET_DEBUG_ADDR)
5758 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5759 GET_MODE_NAME (mode), strict);
5763 if (ix86_decompose_address (addr, &parts) <= 0)
5765 reason = "decomposition failed";
5770 index = parts.index;
5772 scale = parts.scale;
5774 /* Validate base register.
5776 Don't allow SUBREG's here, it can lead to spill failures when the base
5777 is one word out of a two word structure, which is represented internally
5785 if (GET_CODE (base) == SUBREG)
5786 reg = SUBREG_REG (base);
5790 if (GET_CODE (reg) != REG)
5792 reason = "base is not a register";
5796 if (GET_MODE (base) != Pmode)
5798 reason = "base is not in Pmode";
5802 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5803 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5805 reason = "base is not valid";
5810 /* Validate index register.
5812 Don't allow SUBREG's here, it can lead to spill failures when the index
5813 is one word out of a two word structure, which is represented internally
5821 if (GET_CODE (index) == SUBREG)
5822 reg = SUBREG_REG (index);
5826 if (GET_CODE (reg) != REG)
5828 reason = "index is not a register";
5832 if (GET_MODE (index) != Pmode)
5834 reason = "index is not in Pmode";
5838 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5839 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5841 reason = "index is not valid";
5846 /* Validate scale factor. */
5849 reason_rtx = GEN_INT (scale);
5852 reason = "scale without index";
5856 if (scale != 2 && scale != 4 && scale != 8)
5858 reason = "scale is not a valid multiplier";
5863 /* Validate displacement. */
5868 if (GET_CODE (disp) == CONST
5869 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5870 switch (XINT (XEXP (disp, 0), 1))
5874 case UNSPEC_GOTPCREL:
5877 goto is_legitimate_pic;
5879 case UNSPEC_GOTTPOFF:
5880 case UNSPEC_GOTNTPOFF:
5881 case UNSPEC_INDNTPOFF:
5887 reason = "invalid address unspec";
5891 else if (flag_pic && (SYMBOLIC_CONST (disp)
5893 && !machopic_operand_p (disp)
5898 if (TARGET_64BIT && (index || base))
5900 /* foo@dtpoff(%rX) is ok. */
5901 if (GET_CODE (disp) != CONST
5902 || GET_CODE (XEXP (disp, 0)) != PLUS
5903 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5904 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5905 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5906 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5908 reason = "non-constant pic memory reference";
5912 else if (! legitimate_pic_address_disp_p (disp))
5914 reason = "displacement is an invalid pic construct";
5918 /* This code used to verify that a symbolic pic displacement
5919 includes the pic_offset_table_rtx register.
5921 While this is good idea, unfortunately these constructs may
5922 be created by "adds using lea" optimization for incorrect
5931 This code is nonsensical, but results in addressing
5932 GOT table with pic_offset_table_rtx base. We can't
5933 just refuse it easily, since it gets matched by
5934 "addsi3" pattern, that later gets split to lea in the
5935 case output register differs from input. While this
5936 can be handled by separate addsi pattern for this case
5937 that never results in lea, this seems to be easier and
5938 correct fix for crash to disable this test. */
5940 else if (GET_CODE (disp) != LABEL_REF
5941 && GET_CODE (disp) != CONST_INT
5942 && (GET_CODE (disp) != CONST
5943 || !legitimate_constant_p (disp))
5944 && (GET_CODE (disp) != SYMBOL_REF
5945 || !legitimate_constant_p (disp)))
5947 reason = "displacement is not constant";
5950 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5952 reason = "displacement is out of range";
5957 /* Everything looks valid. */
5958 if (TARGET_DEBUG_ADDR)
5959 fprintf (stderr, "Success.\n");
5963 if (TARGET_DEBUG_ADDR)
5965 fprintf (stderr, "Error: %s\n", reason);
5966 debug_rtx (reason_rtx);
5971 /* Return an unique alias set for the GOT. */
5973 static HOST_WIDE_INT
5974 ix86_GOT_alias_set (void)
5976 static HOST_WIDE_INT set = -1;
5978 set = new_alias_set ();
5982 /* Return a legitimate reference for ORIG (an address) using the
5983 register REG. If REG is 0, a new pseudo is generated.
5985 There are two types of references that must be handled:
5987 1. Global data references must load the address from the GOT, via
5988 the PIC reg. An insn is emitted to do this load, and the reg is
5991 2. Static data references, constant pool addresses, and code labels
5992 compute the address as an offset from the GOT, whose base is in
5993 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5994 differentiate them from global data objects. The returned
5995 address is the PIC reg + an unspec constant.
5997 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5998 reg also appears in the address. */
6001 legitimize_pic_address (rtx orig, rtx reg)
6009 reg = gen_reg_rtx (Pmode);
6010 /* Use the generic Mach-O PIC machinery. */
6011 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6014 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6016 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6018 /* This symbol may be referenced via a displacement from the PIC
6019 base address (@GOTOFF). */
6021 if (reload_in_progress)
6022 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6023 if (GET_CODE (addr) == CONST)
6024 addr = XEXP (addr, 0);
6025 if (GET_CODE (addr) == PLUS)
6027 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6028 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6031 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6032 new = gen_rtx_CONST (Pmode, new);
6033 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6037 emit_move_insn (reg, new);
6041 else if (GET_CODE (addr) == SYMBOL_REF)
6045 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6046 new = gen_rtx_CONST (Pmode, new);
6047 new = gen_rtx_MEM (Pmode, new);
6048 RTX_UNCHANGING_P (new) = 1;
6049 set_mem_alias_set (new, ix86_GOT_alias_set ());
6052 reg = gen_reg_rtx (Pmode);
6053 /* Use directly gen_movsi, otherwise the address is loaded
6054 into register for CSE. We don't want to CSE this addresses,
6055 instead we CSE addresses from the GOT table, so skip this. */
6056 emit_insn (gen_movsi (reg, new));
6061 /* This symbol must be referenced via a load from the
6062 Global Offset Table (@GOT). */
6064 if (reload_in_progress)
6065 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6066 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6067 new = gen_rtx_CONST (Pmode, new);
6068 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6069 new = gen_rtx_MEM (Pmode, new);
6070 RTX_UNCHANGING_P (new) = 1;
6071 set_mem_alias_set (new, ix86_GOT_alias_set ());
6074 reg = gen_reg_rtx (Pmode);
6075 emit_move_insn (reg, new);
6081 if (GET_CODE (addr) == CONST)
6083 addr = XEXP (addr, 0);
6085 /* We must match stuff we generate before. Assume the only
6086 unspecs that can get here are ours. Not that we could do
6087 anything with them anyway... */
6088 if (GET_CODE (addr) == UNSPEC
6089 || (GET_CODE (addr) == PLUS
6090 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6092 if (GET_CODE (addr) != PLUS)
6095 if (GET_CODE (addr) == PLUS)
6097 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6099 /* Check first to see if this is a constant offset from a @GOTOFF
6100 symbol reference. */
6101 if (local_symbolic_operand (op0, Pmode)
6102 && GET_CODE (op1) == CONST_INT)
6106 if (reload_in_progress)
6107 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6108 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6110 new = gen_rtx_PLUS (Pmode, new, op1);
6111 new = gen_rtx_CONST (Pmode, new);
6112 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6116 emit_move_insn (reg, new);
6122 if (INTVAL (op1) < -16*1024*1024
6123 || INTVAL (op1) >= 16*1024*1024)
6124 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6129 base = legitimize_pic_address (XEXP (addr, 0), reg);
6130 new = legitimize_pic_address (XEXP (addr, 1),
6131 base == reg ? NULL_RTX : reg);
6133 if (GET_CODE (new) == CONST_INT)
6134 new = plus_constant (base, INTVAL (new));
6137 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6139 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6140 new = XEXP (new, 1);
6142 new = gen_rtx_PLUS (Pmode, base, new);
6150 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6153 get_thread_pointer (int to_reg)
6157 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6161 reg = gen_reg_rtx (Pmode);
6162 insn = gen_rtx_SET (VOIDmode, reg, tp);
6163 insn = emit_insn (insn);
6168 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6169 false if we expect this to be used for a memory address and true if
6170 we expect to load the address into a register. */
6173 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6175 rtx dest, base, off, pic;
6180 case TLS_MODEL_GLOBAL_DYNAMIC:
6181 dest = gen_reg_rtx (Pmode);
6184 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6187 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6188 insns = get_insns ();
6191 emit_libcall_block (insns, dest, rax, x);
6194 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6197 case TLS_MODEL_LOCAL_DYNAMIC:
6198 base = gen_reg_rtx (Pmode);
6201 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6204 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6205 insns = get_insns ();
6208 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6209 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6210 emit_libcall_block (insns, base, rax, note);
6213 emit_insn (gen_tls_local_dynamic_base_32 (base));
6215 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6216 off = gen_rtx_CONST (Pmode, off);
6218 return gen_rtx_PLUS (Pmode, base, off);
6220 case TLS_MODEL_INITIAL_EXEC:
6224 type = UNSPEC_GOTNTPOFF;
6228 if (reload_in_progress)
6229 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6230 pic = pic_offset_table_rtx;
6231 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6233 else if (!TARGET_GNU_TLS)
6235 pic = gen_reg_rtx (Pmode);
6236 emit_insn (gen_set_got (pic));
6237 type = UNSPEC_GOTTPOFF;
6242 type = UNSPEC_INDNTPOFF;
6245 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6246 off = gen_rtx_CONST (Pmode, off);
6248 off = gen_rtx_PLUS (Pmode, pic, off);
6249 off = gen_rtx_MEM (Pmode, off);
6250 RTX_UNCHANGING_P (off) = 1;
6251 set_mem_alias_set (off, ix86_GOT_alias_set ());
6253 if (TARGET_64BIT || TARGET_GNU_TLS)
6255 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6256 off = force_reg (Pmode, off);
6257 return gen_rtx_PLUS (Pmode, base, off);
6261 base = get_thread_pointer (true);
6262 dest = gen_reg_rtx (Pmode);
6263 emit_insn (gen_subsi3 (dest, base, off));
6267 case TLS_MODEL_LOCAL_EXEC:
6268 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6269 (TARGET_64BIT || TARGET_GNU_TLS)
6270 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6271 off = gen_rtx_CONST (Pmode, off);
6273 if (TARGET_64BIT || TARGET_GNU_TLS)
6275 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6276 return gen_rtx_PLUS (Pmode, base, off);
6280 base = get_thread_pointer (true);
6281 dest = gen_reg_rtx (Pmode);
6282 emit_insn (gen_subsi3 (dest, base, off));
6293 /* Try machine-dependent ways of modifying an illegitimate address
6294 to be legitimate. If we find one, return the new, valid address.
6295 This macro is used in only one place: `memory_address' in explow.c.
6297 OLDX is the address as it was before break_out_memory_refs was called.
6298 In some cases it is useful to look at this to decide what needs to be done.
6300 MODE and WIN are passed so that this macro can use
6301 GO_IF_LEGITIMATE_ADDRESS.
6303 It is always safe for this macro to do nothing. It exists to recognize
6304 opportunities to optimize the output.
6306 For the 80386, we handle X+REG by loading X into a register R and
6307 using R+REG. R will go in a general reg and indexing will be used.
6308 However, if REG is a broken-out memory address or multiplication,
6309 nothing needs to be done because REG can certainly go in a general reg.
6311 When -fpic is used, special handling is needed for symbolic references.
6312 See comments by legitimize_pic_address in i386.c for details. */
6315 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6316 enum machine_mode mode)
6321 if (TARGET_DEBUG_ADDR)
6323 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6324 GET_MODE_NAME (mode));
6328 log = tls_symbolic_operand (x, mode);
6330 return legitimize_tls_address (x, log, false);
6332 if (flag_pic && SYMBOLIC_CONST (x))
6333 return legitimize_pic_address (x, 0);
6335 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6336 if (GET_CODE (x) == ASHIFT
6337 && GET_CODE (XEXP (x, 1)) == CONST_INT
6338 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6341 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6342 GEN_INT (1 << log));
6345 if (GET_CODE (x) == PLUS)
6347 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6349 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6350 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6351 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6354 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6355 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6356 GEN_INT (1 << log));
6359 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6360 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6361 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6364 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6365 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6366 GEN_INT (1 << log));
6369 /* Put multiply first if it isn't already. */
6370 if (GET_CODE (XEXP (x, 1)) == MULT)
6372 rtx tmp = XEXP (x, 0);
6373 XEXP (x, 0) = XEXP (x, 1);
6378 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6379 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6380 created by virtual register instantiation, register elimination, and
6381 similar optimizations. */
6382 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6385 x = gen_rtx_PLUS (Pmode,
6386 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6387 XEXP (XEXP (x, 1), 0)),
6388 XEXP (XEXP (x, 1), 1));
6392 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6393 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6394 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6396 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6397 && CONSTANT_P (XEXP (x, 1)))
6400 rtx other = NULL_RTX;
6402 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6404 constant = XEXP (x, 1);
6405 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6407 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6409 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6410 other = XEXP (x, 1);
6418 x = gen_rtx_PLUS (Pmode,
6419 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6420 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6421 plus_constant (other, INTVAL (constant)));
6425 if (changed && legitimate_address_p (mode, x, FALSE))
6428 if (GET_CODE (XEXP (x, 0)) == MULT)
6431 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6434 if (GET_CODE (XEXP (x, 1)) == MULT)
6437 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6441 && GET_CODE (XEXP (x, 1)) == REG
6442 && GET_CODE (XEXP (x, 0)) == REG)
6445 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6448 x = legitimize_pic_address (x, 0);
6451 if (changed && legitimate_address_p (mode, x, FALSE))
6454 if (GET_CODE (XEXP (x, 0)) == REG)
6456 register rtx temp = gen_reg_rtx (Pmode);
6457 register rtx val = force_operand (XEXP (x, 1), temp);
6459 emit_move_insn (temp, val);
6465 else if (GET_CODE (XEXP (x, 1)) == REG)
6467 register rtx temp = gen_reg_rtx (Pmode);
6468 register rtx val = force_operand (XEXP (x, 0), temp);
6470 emit_move_insn (temp, val);
6480 /* Print an integer constant expression in assembler syntax. Addition
6481 and subtraction are the only arithmetic that may appear in these
6482 expressions. FILE is the stdio stream to write to, X is the rtx, and
6483 CODE is the operand print code from the output string. */
6486 output_pic_addr_const (FILE *file, rtx x, int code)
6490 switch (GET_CODE (x))
6500 assemble_name (file, XSTR (x, 0));
6501 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6502 fputs ("@PLT", file);
6509 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6510 assemble_name (asm_out_file, buf);
6514 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6518 /* This used to output parentheses around the expression,
6519 but that does not work on the 386 (either ATT or BSD assembler). */
6520 output_pic_addr_const (file, XEXP (x, 0), code);
6524 if (GET_MODE (x) == VOIDmode)
6526 /* We can use %d if the number is <32 bits and positive. */
6527 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6528 fprintf (file, "0x%lx%08lx",
6529 (unsigned long) CONST_DOUBLE_HIGH (x),
6530 (unsigned long) CONST_DOUBLE_LOW (x));
6532 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6535 /* We can't handle floating point constants;
6536 PRINT_OPERAND must handle them. */
6537 output_operand_lossage ("floating constant misused");
6541 /* Some assemblers need integer constants to appear first. */
6542 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6544 output_pic_addr_const (file, XEXP (x, 0), code);
6546 output_pic_addr_const (file, XEXP (x, 1), code);
6548 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6550 output_pic_addr_const (file, XEXP (x, 1), code);
6552 output_pic_addr_const (file, XEXP (x, 0), code);
6560 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6561 output_pic_addr_const (file, XEXP (x, 0), code);
6563 output_pic_addr_const (file, XEXP (x, 1), code);
6565 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6569 if (XVECLEN (x, 0) != 1)
6571 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6572 switch (XINT (x, 1))
6575 fputs ("@GOT", file);
6578 fputs ("@GOTOFF", file);
6580 case UNSPEC_GOTPCREL:
6581 fputs ("@GOTPCREL(%rip)", file);
6583 case UNSPEC_GOTTPOFF:
6584 /* FIXME: This might be @TPOFF in Sun ld too. */
6585 fputs ("@GOTTPOFF", file);
6588 fputs ("@TPOFF", file);
6592 fputs ("@TPOFF", file);
6594 fputs ("@NTPOFF", file);
6597 fputs ("@DTPOFF", file);
6599 case UNSPEC_GOTNTPOFF:
6601 fputs ("@GOTTPOFF(%rip)", file);
6603 fputs ("@GOTNTPOFF", file);
6605 case UNSPEC_INDNTPOFF:
6606 fputs ("@INDNTPOFF", file);
6609 output_operand_lossage ("invalid UNSPEC as operand");
6615 output_operand_lossage ("invalid expression as operand");
6619 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6620 We need to handle our special PIC relocations. */
6623 i386_dwarf_output_addr_const (FILE *file, rtx x)
6626 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6630 fprintf (file, "%s", ASM_LONG);
6633 output_pic_addr_const (file, x, '\0');
6635 output_addr_const (file, x);
6639 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6640 We need to emit DTP-relative relocations. */
6643 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6645 fputs (ASM_LONG, file);
6646 output_addr_const (file, x);
6647 fputs ("@DTPOFF", file);
6653 fputs (", 0", file);
6660 /* In the name of slightly smaller debug output, and to cater to
6661 general assembler losage, recognize PIC+GOTOFF and turn it back
6662 into a direct symbol reference. */
6665 ix86_delegitimize_address (rtx orig_x)
6669 if (GET_CODE (x) == MEM)
6674 if (GET_CODE (x) != CONST
6675 || GET_CODE (XEXP (x, 0)) != UNSPEC
6676 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6677 || GET_CODE (orig_x) != MEM)
6679 return XVECEXP (XEXP (x, 0), 0, 0);
6682 if (GET_CODE (x) != PLUS
6683 || GET_CODE (XEXP (x, 1)) != CONST)
6686 if (GET_CODE (XEXP (x, 0)) == REG
6687 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6688 /* %ebx + GOT/GOTOFF */
6690 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6692 /* %ebx + %reg * scale + GOT/GOTOFF */
6694 if (GET_CODE (XEXP (y, 0)) == REG
6695 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6697 else if (GET_CODE (XEXP (y, 1)) == REG
6698 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6702 if (GET_CODE (y) != REG
6703 && GET_CODE (y) != MULT
6704 && GET_CODE (y) != ASHIFT)
6710 x = XEXP (XEXP (x, 1), 0);
6711 if (GET_CODE (x) == UNSPEC
6712 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6713 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6716 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6717 return XVECEXP (x, 0, 0);
6720 if (GET_CODE (x) == PLUS
6721 && GET_CODE (XEXP (x, 0)) == UNSPEC
6722 && GET_CODE (XEXP (x, 1)) == CONST_INT
6723 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6724 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6725 && GET_CODE (orig_x) != MEM)))
6727 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6729 return gen_rtx_PLUS (Pmode, y, x);
6737 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6742 if (mode == CCFPmode || mode == CCFPUmode)
6744 enum rtx_code second_code, bypass_code;
6745 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6746 if (bypass_code != NIL || second_code != NIL)
6748 code = ix86_fp_compare_code_to_integer (code);
6752 code = reverse_condition (code);
6763 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6768 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6769 Those same assemblers have the same but opposite losage on cmov. */
6772 suffix = fp ? "nbe" : "a";
6775 if (mode == CCNOmode || mode == CCGOCmode)
6777 else if (mode == CCmode || mode == CCGCmode)
6788 if (mode == CCNOmode || mode == CCGOCmode)
6790 else if (mode == CCmode || mode == CCGCmode)
6799 suffix = fp ? "nb" : "ae";
6802 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6812 suffix = fp ? "u" : "p";
6815 suffix = fp ? "nu" : "np";
6820 fputs (suffix, file);
6824 print_reg (rtx x, int code, FILE *file)
6826 if (REGNO (x) == ARG_POINTER_REGNUM
6827 || REGNO (x) == FRAME_POINTER_REGNUM
6828 || REGNO (x) == FLAGS_REG
6829 || REGNO (x) == FPSR_REG)
6832 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6835 if (code == 'w' || MMX_REG_P (x))
6837 else if (code == 'b')
6839 else if (code == 'k')
6841 else if (code == 'q')
6843 else if (code == 'y')
6845 else if (code == 'h')
6848 code = GET_MODE_SIZE (GET_MODE (x));
6850 /* Irritatingly, AMD extended registers use different naming convention
6851 from the normal registers. */
6852 if (REX_INT_REG_P (x))
6859 error ("extended registers have no high halves");
6862 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6865 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6868 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6871 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6874 error ("unsupported operand size for extended register");
6882 if (STACK_TOP_P (x))
6884 fputs ("st(0)", file);
6891 if (! ANY_FP_REG_P (x))
6892 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6896 fputs (hi_reg_name[REGNO (x)], file);
6899 fputs (qi_reg_name[REGNO (x)], file);
6902 fputs (qi_high_reg_name[REGNO (x)], file);
6909 /* Locate some local-dynamic symbol still in use by this function
6910 so that we can print its name in some tls_local_dynamic_base
6914 get_some_local_dynamic_name (void)
6918 if (cfun->machine->some_ld_name)
6919 return cfun->machine->some_ld_name;
6921 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6923 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6924 return cfun->machine->some_ld_name;
6930 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6934 if (GET_CODE (x) == SYMBOL_REF
6935 && local_dynamic_symbolic_operand (x, Pmode))
6937 cfun->machine->some_ld_name = XSTR (x, 0);
6945 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6946 C -- print opcode suffix for set/cmov insn.
6947 c -- like C, but print reversed condition
6948 F,f -- likewise, but for floating-point.
6949 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6951 R -- print the prefix for register names.
6952 z -- print the opcode suffix for the size of the current operand.
6953 * -- print a star (in certain assembler syntax)
6954 A -- print an absolute memory reference.
6955 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6956 s -- print a shift double count, followed by the assemblers argument
6958 b -- print the QImode name of the register for the indicated operand.
6959 %b0 would print %al if operands[0] is reg 0.
6960 w -- likewise, print the HImode name of the register.
6961 k -- likewise, print the SImode name of the register.
6962 q -- likewise, print the DImode name of the register.
6963 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6964 y -- print "st(0)" instead of "st" as a register.
6965 D -- print condition for SSE cmp instruction.
6966 P -- if PIC, print an @PLT suffix.
6967 X -- don't print any sort of PIC '@' suffix for a symbol.
6968 & -- print some in-use local-dynamic symbol name.
6972 print_operand (FILE *file, rtx x, int code)
6979 if (ASSEMBLER_DIALECT == ASM_ATT)
6984 assemble_name (file, get_some_local_dynamic_name ());
6988 if (ASSEMBLER_DIALECT == ASM_ATT)
6990 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6992 /* Intel syntax. For absolute addresses, registers should not
6993 be surrounded by braces. */
6994 if (GET_CODE (x) != REG)
6997 PRINT_OPERAND (file, x, 0);
7005 PRINT_OPERAND (file, x, 0);
7010 if (ASSEMBLER_DIALECT == ASM_ATT)
7015 if (ASSEMBLER_DIALECT == ASM_ATT)
7020 if (ASSEMBLER_DIALECT == ASM_ATT)
7025 if (ASSEMBLER_DIALECT == ASM_ATT)
7030 if (ASSEMBLER_DIALECT == ASM_ATT)
7035 if (ASSEMBLER_DIALECT == ASM_ATT)
7040 /* 387 opcodes don't get size suffixes if the operands are
7042 if (STACK_REG_P (x))
7045 /* Likewise if using Intel opcodes. */
7046 if (ASSEMBLER_DIALECT == ASM_INTEL)
7049 /* This is the size of op from size of operand. */
7050 switch (GET_MODE_SIZE (GET_MODE (x)))
7053 #ifdef HAVE_GAS_FILDS_FISTS
7059 if (GET_MODE (x) == SFmode)
7074 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7076 #ifdef GAS_MNEMONICS
7102 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7104 PRINT_OPERAND (file, x, 0);
7110 /* Little bit of braindamage here. The SSE compare instructions
7111 does use completely different names for the comparisons that the
7112 fp conditional moves. */
7113 switch (GET_CODE (x))
7128 fputs ("unord", file);
7132 fputs ("neq", file);
7136 fputs ("nlt", file);
7140 fputs ("nle", file);
7143 fputs ("ord", file);
7151 #ifdef CMOV_SUN_AS_SYNTAX
7152 if (ASSEMBLER_DIALECT == ASM_ATT)
7154 switch (GET_MODE (x))
7156 case HImode: putc ('w', file); break;
7158 case SFmode: putc ('l', file); break;
7160 case DFmode: putc ('q', file); break;
7168 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7171 #ifdef CMOV_SUN_AS_SYNTAX
7172 if (ASSEMBLER_DIALECT == ASM_ATT)
7175 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7178 /* Like above, but reverse condition */
7180 /* Check to see if argument to %c is really a constant
7181 and not a condition code which needs to be reversed. */
7182 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7184 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7187 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7190 #ifdef CMOV_SUN_AS_SYNTAX
7191 if (ASSEMBLER_DIALECT == ASM_ATT)
7194 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7200 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7203 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7206 int pred_val = INTVAL (XEXP (x, 0));
7208 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7209 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7211 int taken = pred_val > REG_BR_PROB_BASE / 2;
7212 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7214 /* Emit hints only in the case default branch prediction
7215 heuristics would fail. */
7216 if (taken != cputaken)
7218 /* We use 3e (DS) prefix for taken branches and
7219 2e (CS) prefix for not taken branches. */
7221 fputs ("ds ; ", file);
7223 fputs ("cs ; ", file);
7230 output_operand_lossage ("invalid operand code `%c'", code);
7234 if (GET_CODE (x) == REG)
7236 PRINT_REG (x, code, file);
7239 else if (GET_CODE (x) == MEM)
7241 /* No `byte ptr' prefix for call instructions. */
7242 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7245 switch (GET_MODE_SIZE (GET_MODE (x)))
7247 case 1: size = "BYTE"; break;
7248 case 2: size = "WORD"; break;
7249 case 4: size = "DWORD"; break;
7250 case 8: size = "QWORD"; break;
7251 case 12: size = "XWORD"; break;
7252 case 16: size = "XMMWORD"; break;
7257 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7260 else if (code == 'w')
7262 else if (code == 'k')
7266 fputs (" PTR ", file);
7270 /* Avoid (%rip) for call operands. */
7271 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7272 && GET_CODE (x) != CONST_INT)
7273 output_addr_const (file, x);
7274 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7275 output_operand_lossage ("invalid constraints for operand");
7280 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7285 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7286 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7288 if (ASSEMBLER_DIALECT == ASM_ATT)
7290 fprintf (file, "0x%lx", l);
7293 /* These float cases don't actually occur as immediate operands. */
7294 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7298 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7299 fprintf (file, "%s", dstr);
7302 else if (GET_CODE (x) == CONST_DOUBLE
7303 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7307 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7308 fprintf (file, "%s", dstr);
7315 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7317 if (ASSEMBLER_DIALECT == ASM_ATT)
7320 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7321 || GET_CODE (x) == LABEL_REF)
7323 if (ASSEMBLER_DIALECT == ASM_ATT)
7326 fputs ("OFFSET FLAT:", file);
7329 if (GET_CODE (x) == CONST_INT)
7330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7332 output_pic_addr_const (file, x, code);
7334 output_addr_const (file, x);
7338 /* Print a memory operand whose address is ADDR. */
7341 print_operand_address (FILE *file, register rtx addr)
7343 struct ix86_address parts;
7344 rtx base, index, disp;
7347 if (! ix86_decompose_address (addr, &parts))
7351 index = parts.index;
7353 scale = parts.scale;
7361 if (USER_LABEL_PREFIX[0] == 0)
7363 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7369 if (!base && !index)
7371 /* Displacement only requires special attention. */
7373 if (GET_CODE (disp) == CONST_INT)
7375 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7377 if (USER_LABEL_PREFIX[0] == 0)
7379 fputs ("ds:", file);
7381 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7384 output_pic_addr_const (file, disp, 0);
7386 output_addr_const (file, disp);
7388 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7390 && ((GET_CODE (disp) == SYMBOL_REF
7391 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7392 || GET_CODE (disp) == LABEL_REF
7393 || (GET_CODE (disp) == CONST
7394 && GET_CODE (XEXP (disp, 0)) == PLUS
7395 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7396 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7397 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7398 fputs ("(%rip)", file);
7402 if (ASSEMBLER_DIALECT == ASM_ATT)
7407 output_pic_addr_const (file, disp, 0);
7408 else if (GET_CODE (disp) == LABEL_REF)
7409 output_asm_label (disp);
7411 output_addr_const (file, disp);
7416 PRINT_REG (base, 0, file);
7420 PRINT_REG (index, 0, file);
7422 fprintf (file, ",%d", scale);
7428 rtx offset = NULL_RTX;
7432 /* Pull out the offset of a symbol; print any symbol itself. */
7433 if (GET_CODE (disp) == CONST
7434 && GET_CODE (XEXP (disp, 0)) == PLUS
7435 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7437 offset = XEXP (XEXP (disp, 0), 1);
7438 disp = gen_rtx_CONST (VOIDmode,
7439 XEXP (XEXP (disp, 0), 0));
7443 output_pic_addr_const (file, disp, 0);
7444 else if (GET_CODE (disp) == LABEL_REF)
7445 output_asm_label (disp);
7446 else if (GET_CODE (disp) == CONST_INT)
7449 output_addr_const (file, disp);
7455 PRINT_REG (base, 0, file);
7458 if (INTVAL (offset) >= 0)
7460 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7464 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7471 PRINT_REG (index, 0, file);
7473 fprintf (file, "*%d", scale);
7481 output_addr_const_extra (FILE *file, rtx x)
7485 if (GET_CODE (x) != UNSPEC)
7488 op = XVECEXP (x, 0, 0);
7489 switch (XINT (x, 1))
7491 case UNSPEC_GOTTPOFF:
7492 output_addr_const (file, op);
7493 /* FIXME: This might be @TPOFF in Sun ld. */
7494 fputs ("@GOTTPOFF", file);
7497 output_addr_const (file, op);
7498 fputs ("@TPOFF", file);
7501 output_addr_const (file, op);
7503 fputs ("@TPOFF", file);
7505 fputs ("@NTPOFF", file);
7508 output_addr_const (file, op);
7509 fputs ("@DTPOFF", file);
7511 case UNSPEC_GOTNTPOFF:
7512 output_addr_const (file, op);
7514 fputs ("@GOTTPOFF(%rip)", file);
7516 fputs ("@GOTNTPOFF", file);
7518 case UNSPEC_INDNTPOFF:
7519 output_addr_const (file, op);
7520 fputs ("@INDNTPOFF", file);
7530 /* Split one or more DImode RTL references into pairs of SImode
7531 references. The RTL can be REG, offsettable MEM, integer constant, or
7532 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7533 split and "num" is its length. lo_half and hi_half are output arrays
7534 that parallel "operands". */
7537 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7541 rtx op = operands[num];
7543 /* simplify_subreg refuse to split volatile memory addresses,
7544 but we still have to handle it. */
7545 if (GET_CODE (op) == MEM)
7547 lo_half[num] = adjust_address (op, SImode, 0);
7548 hi_half[num] = adjust_address (op, SImode, 4);
7552 lo_half[num] = simplify_gen_subreg (SImode, op,
7553 GET_MODE (op) == VOIDmode
7554 ? DImode : GET_MODE (op), 0);
7555 hi_half[num] = simplify_gen_subreg (SImode, op,
7556 GET_MODE (op) == VOIDmode
7557 ? DImode : GET_MODE (op), 4);
7561 /* Split one or more TImode RTL references into pairs of SImode
7562 references. The RTL can be REG, offsettable MEM, integer constant, or
7563 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7564 split and "num" is its length. lo_half and hi_half are output arrays
7565 that parallel "operands". */
7568 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7572 rtx op = operands[num];
7574 /* simplify_subreg refuse to split volatile memory addresses, but we
7575 still have to handle it. */
7576 if (GET_CODE (op) == MEM)
7578 lo_half[num] = adjust_address (op, DImode, 0);
7579 hi_half[num] = adjust_address (op, DImode, 8);
7583 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7584 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7589 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7590 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7591 is the expression of the binary operation. The output may either be
7592 emitted here, or returned to the caller, like all output_* functions.
7594 There is no guarantee that the operands are the same mode, as they
7595 might be within FLOAT or FLOAT_EXTEND expressions. */
7597 #ifndef SYSV386_COMPAT
7598 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7599 wants to fix the assemblers because that causes incompatibility
7600 with gcc. No-one wants to fix gcc because that causes
7601 incompatibility with assemblers... You can use the option of
7602 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7603 #define SYSV386_COMPAT 1
7607 output_387_binary_op (rtx insn, rtx *operands)
7609 static char buf[30];
7612 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7614 #ifdef ENABLE_CHECKING
7615 /* Even if we do not want to check the inputs, this documents input
7616 constraints. Which helps in understanding the following code. */
7617 if (STACK_REG_P (operands[0])
7618 && ((REG_P (operands[1])
7619 && REGNO (operands[0]) == REGNO (operands[1])
7620 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7621 || (REG_P (operands[2])
7622 && REGNO (operands[0]) == REGNO (operands[2])
7623 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7624 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7630 switch (GET_CODE (operands[3]))
7633 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7634 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7642 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7643 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7651 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7652 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7660 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7661 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7675 if (GET_MODE (operands[0]) == SFmode)
7676 strcat (buf, "ss\t{%2, %0|%0, %2}");
7678 strcat (buf, "sd\t{%2, %0|%0, %2}");
7683 switch (GET_CODE (operands[3]))
7687 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7689 rtx temp = operands[2];
7690 operands[2] = operands[1];
7694 /* know operands[0] == operands[1]. */
7696 if (GET_CODE (operands[2]) == MEM)
7702 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7704 if (STACK_TOP_P (operands[0]))
7705 /* How is it that we are storing to a dead operand[2]?
7706 Well, presumably operands[1] is dead too. We can't
7707 store the result to st(0) as st(0) gets popped on this
7708 instruction. Instead store to operands[2] (which I
7709 think has to be st(1)). st(1) will be popped later.
7710 gcc <= 2.8.1 didn't have this check and generated
7711 assembly code that the Unixware assembler rejected. */
7712 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7714 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7718 if (STACK_TOP_P (operands[0]))
7719 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7721 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7726 if (GET_CODE (operands[1]) == MEM)
7732 if (GET_CODE (operands[2]) == MEM)
7738 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7741 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7742 derived assemblers, confusingly reverse the direction of
7743 the operation for fsub{r} and fdiv{r} when the
7744 destination register is not st(0). The Intel assembler
7745 doesn't have this brain damage. Read !SYSV386_COMPAT to
7746 figure out what the hardware really does. */
7747 if (STACK_TOP_P (operands[0]))
7748 p = "{p\t%0, %2|rp\t%2, %0}";
7750 p = "{rp\t%2, %0|p\t%0, %2}";
7752 if (STACK_TOP_P (operands[0]))
7753 /* As above for fmul/fadd, we can't store to st(0). */
7754 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7756 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7764 if (STACK_TOP_P (operands[0]))
7765 p = "{rp\t%0, %1|p\t%1, %0}";
7767 p = "{p\t%1, %0|rp\t%0, %1}";
7769 if (STACK_TOP_P (operands[0]))
7770 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7772 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7777 if (STACK_TOP_P (operands[0]))
7779 if (STACK_TOP_P (operands[1]))
7780 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7782 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7785 else if (STACK_TOP_P (operands[1]))
7788 p = "{\t%1, %0|r\t%0, %1}";
7790 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7796 p = "{r\t%2, %0|\t%0, %2}";
7798 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7811 /* Output code to initialize control word copies used by
7812 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7813 is set to control word rounding downwards. */
7815 emit_i387_cw_initialization (rtx normal, rtx round_down)
7817 rtx reg = gen_reg_rtx (HImode);
7819 emit_insn (gen_x86_fnstcw_1 (normal));
7820 emit_move_insn (reg, normal);
7821 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7823 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7825 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7826 emit_move_insn (round_down, reg);
7829 /* Output code for INSN to convert a float to a signed int. OPERANDS
7830 are the insn operands. The output may be [HSD]Imode and the input
7831 operand may be [SDX]Fmode. */
7834 output_fix_trunc (rtx insn, rtx *operands)
7836 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7837 int dimode_p = GET_MODE (operands[0]) == DImode;
7839 /* Jump through a hoop or two for DImode, since the hardware has no
7840 non-popping instruction. We used to do this a different way, but
7841 that was somewhat fragile and broke with post-reload splitters. */
7842 if (dimode_p && !stack_top_dies)
7843 output_asm_insn ("fld\t%y1", operands);
7845 if (!STACK_TOP_P (operands[1]))
7848 if (GET_CODE (operands[0]) != MEM)
7851 output_asm_insn ("fldcw\t%3", operands);
7852 if (stack_top_dies || dimode_p)
7853 output_asm_insn ("fistp%z0\t%0", operands);
7855 output_asm_insn ("fist%z0\t%0", operands);
7856 output_asm_insn ("fldcw\t%2", operands);
7861 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7862 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7863 when fucom should be used. */
7866 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7869 rtx cmp_op0 = operands[0];
7870 rtx cmp_op1 = operands[1];
7871 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7876 cmp_op1 = operands[2];
7880 if (GET_MODE (operands[0]) == SFmode)
7882 return "ucomiss\t{%1, %0|%0, %1}";
7884 return "comiss\t{%1, %0|%0, %1}";
7887 return "ucomisd\t{%1, %0|%0, %1}";
7889 return "comisd\t{%1, %0|%0, %1}";
7892 if (! STACK_TOP_P (cmp_op0))
7895 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7897 if (STACK_REG_P (cmp_op1)
7899 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7900 && REGNO (cmp_op1) != FIRST_STACK_REG)
7902 /* If both the top of the 387 stack dies, and the other operand
7903 is also a stack register that dies, then this must be a
7904 `fcompp' float compare */
7908 /* There is no double popping fcomi variant. Fortunately,
7909 eflags is immune from the fstp's cc clobbering. */
7911 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7913 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7921 return "fucompp\n\tfnstsw\t%0";
7923 return "fcompp\n\tfnstsw\t%0";
7936 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7938 static const char * const alt[24] =
7950 "fcomi\t{%y1, %0|%0, %y1}",
7951 "fcomip\t{%y1, %0|%0, %y1}",
7952 "fucomi\t{%y1, %0|%0, %y1}",
7953 "fucomip\t{%y1, %0|%0, %y1}",
7960 "fcom%z2\t%y2\n\tfnstsw\t%0",
7961 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7962 "fucom%z2\t%y2\n\tfnstsw\t%0",
7963 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7965 "ficom%z2\t%y2\n\tfnstsw\t%0",
7966 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7974 mask = eflags_p << 3;
7975 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7976 mask |= unordered_p << 1;
7977 mask |= stack_top_dies;
7990 ix86_output_addr_vec_elt (FILE *file, int value)
7992 const char *directive = ASM_LONG;
7997 directive = ASM_QUAD;
8003 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8007 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8010 fprintf (file, "%s%s%d-%s%d\n",
8011 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8012 else if (HAVE_AS_GOTOFF_IN_DATA)
8013 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8015 else if (TARGET_MACHO)
8016 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8017 machopic_function_base_name () + 1);
8020 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8021 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8024 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8028 ix86_expand_clear (rtx dest)
8032 /* We play register width games, which are only valid after reload. */
8033 if (!reload_completed)
8036 /* Avoid HImode and its attendant prefix byte. */
8037 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8038 dest = gen_rtx_REG (SImode, REGNO (dest));
8040 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8042 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8043 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8045 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8046 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8052 /* X is an unchanging MEM. If it is a constant pool reference, return
8053 the constant pool rtx, else NULL. */
8056 maybe_get_pool_constant (rtx x)
8058 x = ix86_delegitimize_address (XEXP (x, 0));
8060 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8061 return get_pool_constant (x);
8067 ix86_expand_move (enum machine_mode mode, rtx operands[])
8069 int strict = (reload_in_progress || reload_completed);
8071 enum tls_model model;
8076 model = tls_symbolic_operand (op1, Pmode);
8079 op1 = legitimize_tls_address (op1, model, true);
8080 op1 = force_operand (op1, op0);
8085 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8090 rtx temp = ((reload_in_progress
8091 || ((op0 && GET_CODE (op0) == REG)
8093 ? op0 : gen_reg_rtx (Pmode));
8094 op1 = machopic_indirect_data_reference (op1, temp);
8095 op1 = machopic_legitimize_pic_address (op1, mode,
8096 temp == op1 ? 0 : temp);
8098 else if (MACHOPIC_INDIRECT)
8099 op1 = machopic_indirect_data_reference (op1, 0);
8103 if (GET_CODE (op0) == MEM)
8104 op1 = force_reg (Pmode, op1);
8108 if (GET_CODE (temp) != REG)
8109 temp = gen_reg_rtx (Pmode);
8110 temp = legitimize_pic_address (op1, temp);
8115 #endif /* TARGET_MACHO */
8119 if (GET_CODE (op0) == MEM
8120 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8121 || !push_operand (op0, mode))
8122 && GET_CODE (op1) == MEM)
8123 op1 = force_reg (mode, op1);
8125 if (push_operand (op0, mode)
8126 && ! general_no_elim_operand (op1, mode))
8127 op1 = copy_to_mode_reg (mode, op1);
8129 /* Force large constants in 64bit compilation into register
8130 to get them CSEed. */
8131 if (TARGET_64BIT && mode == DImode
8132 && immediate_operand (op1, mode)
8133 && !x86_64_zero_extended_value (op1)
8134 && !register_operand (op0, mode)
8135 && optimize && !reload_completed && !reload_in_progress)
8136 op1 = copy_to_mode_reg (mode, op1);
8138 if (FLOAT_MODE_P (mode))
8140 /* If we are loading a floating point constant to a register,
8141 force the value to memory now, since we'll get better code
8142 out the back end. */
8146 else if (GET_CODE (op1) == CONST_DOUBLE)
8148 op1 = validize_mem (force_const_mem (mode, op1));
8149 if (!register_operand (op0, mode))
8151 rtx temp = gen_reg_rtx (mode);
8152 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8153 emit_move_insn (op0, temp);
8160 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8164 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8166 /* Force constants other than zero into memory. We do not know how
8167 the instructions used to build constants modify the upper 64 bits
8168 of the register, once we have that information we may be able
8169 to handle some of them more efficiently. */
8170 if ((reload_in_progress | reload_completed) == 0
8171 && register_operand (operands[0], mode)
8172 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8173 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8175 /* Make operand1 a register if it isn't already. */
8177 && !register_operand (operands[0], mode)
8178 && !register_operand (operands[1], mode))
8180 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8181 emit_move_insn (operands[0], temp);
8185 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8188 /* Attempt to expand a binary operator. Make the expansion closer to the
8189 actual machine, then just general_operand, which will allow 3 separate
8190 memory references (one output, two input) in a single insn. */
8193 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8196 int matching_memory;
8197 rtx src1, src2, dst, op, clob;
8203 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8204 if (GET_RTX_CLASS (code) == 'c'
8205 && (rtx_equal_p (dst, src2)
8206 || immediate_operand (src1, mode)))
8213 /* If the destination is memory, and we do not have matching source
8214 operands, do things in registers. */
8215 matching_memory = 0;
8216 if (GET_CODE (dst) == MEM)
8218 if (rtx_equal_p (dst, src1))
8219 matching_memory = 1;
8220 else if (GET_RTX_CLASS (code) == 'c'
8221 && rtx_equal_p (dst, src2))
8222 matching_memory = 2;
8224 dst = gen_reg_rtx (mode);
8227 /* Both source operands cannot be in memory. */
8228 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8230 if (matching_memory != 2)
8231 src2 = force_reg (mode, src2);
8233 src1 = force_reg (mode, src1);
8236 /* If the operation is not commutable, source 1 cannot be a constant
8237 or non-matching memory. */
8238 if ((CONSTANT_P (src1)
8239 || (!matching_memory && GET_CODE (src1) == MEM))
8240 && GET_RTX_CLASS (code) != 'c')
8241 src1 = force_reg (mode, src1);
8243 /* If optimizing, copy to regs to improve CSE */
8244 if (optimize && ! no_new_pseudos)
8246 if (GET_CODE (dst) == MEM)
8247 dst = gen_reg_rtx (mode);
8248 if (GET_CODE (src1) == MEM)
8249 src1 = force_reg (mode, src1);
8250 if (GET_CODE (src2) == MEM)
8251 src2 = force_reg (mode, src2);
8254 /* Emit the instruction. */
8256 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8257 if (reload_in_progress)
8259 /* Reload doesn't know about the flags register, and doesn't know that
8260 it doesn't want to clobber it. We can only do this with PLUS. */
8267 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8271 /* Fix up the destination if needed. */
8272 if (dst != operands[0])
8273 emit_move_insn (operands[0], dst);
8276 /* Return TRUE or FALSE depending on whether the binary operator meets the
8277 appropriate constraints. */
8280 ix86_binary_operator_ok (enum rtx_code code,
8281 enum machine_mode mode ATTRIBUTE_UNUSED,
8284 /* Both source operands cannot be in memory. */
8285 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8287 /* If the operation is not commutable, source 1 cannot be a constant. */
8288 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8290 /* If the destination is memory, we must have a matching source operand. */
8291 if (GET_CODE (operands[0]) == MEM
8292 && ! (rtx_equal_p (operands[0], operands[1])
8293 || (GET_RTX_CLASS (code) == 'c'
8294 && rtx_equal_p (operands[0], operands[2]))))
8296 /* If the operation is not commutable and the source 1 is memory, we must
8297 have a matching destination. */
8298 if (GET_CODE (operands[1]) == MEM
8299 && GET_RTX_CLASS (code) != 'c'
8300 && ! rtx_equal_p (operands[0], operands[1]))
8305 /* Attempt to expand a unary operator. Make the expansion closer to the
8306 actual machine, then just general_operand, which will allow 2 separate
8307 memory references (one output, one input) in a single insn. */
8310 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8313 int matching_memory;
8314 rtx src, dst, op, clob;
8319 /* If the destination is memory, and we do not have matching source
8320 operands, do things in registers. */
8321 matching_memory = 0;
8322 if (GET_CODE (dst) == MEM)
8324 if (rtx_equal_p (dst, src))
8325 matching_memory = 1;
8327 dst = gen_reg_rtx (mode);
8330 /* When source operand is memory, destination must match. */
8331 if (!matching_memory && GET_CODE (src) == MEM)
8332 src = force_reg (mode, src);
8334 /* If optimizing, copy to regs to improve CSE */
8335 if (optimize && ! no_new_pseudos)
8337 if (GET_CODE (dst) == MEM)
8338 dst = gen_reg_rtx (mode);
8339 if (GET_CODE (src) == MEM)
8340 src = force_reg (mode, src);
8343 /* Emit the instruction. */
8345 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8346 if (reload_in_progress || code == NOT)
8348 /* Reload doesn't know about the flags register, and doesn't know that
8349 it doesn't want to clobber it. */
8356 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8357 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8360 /* Fix up the destination if needed. */
8361 if (dst != operands[0])
8362 emit_move_insn (operands[0], dst);
8365 /* Return TRUE or FALSE depending on whether the unary operator meets the
8366 appropriate constraints. */
8369 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8370 enum machine_mode mode ATTRIBUTE_UNUSED,
8371 rtx operands[2] ATTRIBUTE_UNUSED)
8373 /* If one of operands is memory, source and destination must match. */
8374 if ((GET_CODE (operands[0]) == MEM
8375 || GET_CODE (operands[1]) == MEM)
8376 && ! rtx_equal_p (operands[0], operands[1]))
8381 /* Return TRUE or FALSE depending on whether the first SET in INSN
8382 has source and destination with matching CC modes, and that the
8383 CC mode is at least as constrained as REQ_MODE. */
8386 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8389 enum machine_mode set_mode;
8391 set = PATTERN (insn);
8392 if (GET_CODE (set) == PARALLEL)
8393 set = XVECEXP (set, 0, 0);
8394 if (GET_CODE (set) != SET)
8396 if (GET_CODE (SET_SRC (set)) != COMPARE)
8399 set_mode = GET_MODE (SET_DEST (set));
8403 if (req_mode != CCNOmode
8404 && (req_mode != CCmode
8405 || XEXP (SET_SRC (set), 1) != const0_rtx))
8409 if (req_mode == CCGCmode)
8413 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8417 if (req_mode == CCZmode)
8427 return (GET_MODE (SET_SRC (set)) == set_mode);
8430 /* Generate insn patterns to do an integer compare of OPERANDS. */
8433 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8435 enum machine_mode cmpmode;
8438 cmpmode = SELECT_CC_MODE (code, op0, op1);
8439 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8441 /* This is very simple, but making the interface the same as in the
8442 FP case makes the rest of the code easier. */
8443 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8444 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8446 /* Return the test that should be put into the flags user, i.e.
8447 the bcc, scc, or cmov instruction. */
8448 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8451 /* Figure out whether to use ordered or unordered fp comparisons.
8452 Return the appropriate mode to use. */
8455 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8457 /* ??? In order to make all comparisons reversible, we do all comparisons
8458 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8459 all forms trapping and nontrapping comparisons, we can make inequality
8460 comparisons trapping again, since it results in better code when using
8461 FCOM based compares. */
8462 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8466 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8468 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8469 return ix86_fp_compare_mode (code);
8472 /* Only zero flag is needed. */
8474 case NE: /* ZF!=0 */
8476 /* Codes needing carry flag. */
8477 case GEU: /* CF=0 */
8478 case GTU: /* CF=0 & ZF=0 */
8479 case LTU: /* CF=1 */
8480 case LEU: /* CF=1 | ZF=1 */
8482 /* Codes possibly doable only with sign flag when
8483 comparing against zero. */
8484 case GE: /* SF=OF or SF=0 */
8485 case LT: /* SF<>OF or SF=1 */
8486 if (op1 == const0_rtx)
8489 /* For other cases Carry flag is not required. */
8491 /* Codes doable only with sign flag when comparing
8492 against zero, but we miss jump instruction for it
8493 so we need to use relational tests against overflow
8494 that thus needs to be zero. */
8495 case GT: /* ZF=0 & SF=OF */
8496 case LE: /* ZF=1 | SF<>OF */
8497 if (op1 == const0_rtx)
8501 /* strcmp pattern do (use flags) and combine may ask us for proper
8510 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8513 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8515 enum rtx_code swapped_code = swap_condition (code);
8516 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8517 || (ix86_fp_comparison_cost (swapped_code)
8518 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8521 /* Swap, force into registers, or otherwise massage the two operands
8522 to a fp comparison. The operands are updated in place; the new
8523 comparison code is returned. */
8525 static enum rtx_code
8526 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8528 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8529 rtx op0 = *pop0, op1 = *pop1;
8530 enum machine_mode op_mode = GET_MODE (op0);
8531 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8533 /* All of the unordered compare instructions only work on registers.
8534 The same is true of the XFmode compare instructions. The same is
8535 true of the fcomi compare instructions. */
8538 && (fpcmp_mode == CCFPUmode
8539 || op_mode == XFmode
8540 || op_mode == TFmode
8541 || ix86_use_fcomi_compare (code)))
8543 op0 = force_reg (op_mode, op0);
8544 op1 = force_reg (op_mode, op1);
8548 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8549 things around if they appear profitable, otherwise force op0
8552 if (standard_80387_constant_p (op0) == 0
8553 || (GET_CODE (op0) == MEM
8554 && ! (standard_80387_constant_p (op1) == 0
8555 || GET_CODE (op1) == MEM)))
8558 tmp = op0, op0 = op1, op1 = tmp;
8559 code = swap_condition (code);
8562 if (GET_CODE (op0) != REG)
8563 op0 = force_reg (op_mode, op0);
8565 if (CONSTANT_P (op1))
8567 if (standard_80387_constant_p (op1))
8568 op1 = force_reg (op_mode, op1);
8570 op1 = validize_mem (force_const_mem (op_mode, op1));
8574 /* Try to rearrange the comparison to make it cheaper. */
8575 if (ix86_fp_comparison_cost (code)
8576 > ix86_fp_comparison_cost (swap_condition (code))
8577 && (GET_CODE (op1) == REG || !no_new_pseudos))
8580 tmp = op0, op0 = op1, op1 = tmp;
8581 code = swap_condition (code);
8582 if (GET_CODE (op0) != REG)
8583 op0 = force_reg (op_mode, op0);
8591 /* Convert comparison codes we use to represent FP comparison to integer
8592 code that will result in proper branch. Return UNKNOWN if no such code
8594 static enum rtx_code
8595 ix86_fp_compare_code_to_integer (enum rtx_code code)
8624 /* Split comparison code CODE into comparisons we can do using branch
8625 instructions. BYPASS_CODE is comparison code for branch that will
8626 branch around FIRST_CODE and SECOND_CODE. If some of branches
8627 is not required, set value to NIL.
8628 We never require more than two branches. */
8630 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8631 enum rtx_code *first_code,
8632 enum rtx_code *second_code)
8638 /* The fcomi comparison sets flags as follows:
8648 case GT: /* GTU - CF=0 & ZF=0 */
8649 case GE: /* GEU - CF=0 */
8650 case ORDERED: /* PF=0 */
8651 case UNORDERED: /* PF=1 */
8652 case UNEQ: /* EQ - ZF=1 */
8653 case UNLT: /* LTU - CF=1 */
8654 case UNLE: /* LEU - CF=1 | ZF=1 */
8655 case LTGT: /* EQ - ZF=0 */
8657 case LT: /* LTU - CF=1 - fails on unordered */
8659 *bypass_code = UNORDERED;
8661 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8663 *bypass_code = UNORDERED;
8665 case EQ: /* EQ - ZF=1 - fails on unordered */
8667 *bypass_code = UNORDERED;
8669 case NE: /* NE - ZF=0 - fails on unordered */
8671 *second_code = UNORDERED;
8673 case UNGE: /* GEU - CF=0 - fails on unordered */
8675 *second_code = UNORDERED;
8677 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8679 *second_code = UNORDERED;
8684 if (!TARGET_IEEE_FP)
8691 /* Return cost of comparison done fcom + arithmetics operations on AX.
8692 All following functions do use number of instructions as a cost metrics.
8693 In future this should be tweaked to compute bytes for optimize_size and
8694 take into account performance of various instructions on various CPUs. */
8696 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8698 if (!TARGET_IEEE_FP)
8700 /* The cost of code output by ix86_expand_fp_compare. */
8728 /* Return cost of comparison done using fcomi operation.
8729 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8731 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8733 enum rtx_code bypass_code, first_code, second_code;
8734 /* Return arbitrarily high cost when instruction is not supported - this
8735 prevents gcc from using it. */
8738 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8739 return (bypass_code != NIL || second_code != NIL) + 2;
8742 /* Return cost of comparison done using sahf operation.
8743 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8745 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8747 enum rtx_code bypass_code, first_code, second_code;
8748 /* Return arbitrarily high cost when instruction is not preferred - this
8749 avoids gcc from using it. */
8750 if (!TARGET_USE_SAHF && !optimize_size)
8752 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8753 return (bypass_code != NIL || second_code != NIL) + 3;
8756 /* Compute cost of the comparison done using any method.
8757 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8759 ix86_fp_comparison_cost (enum rtx_code code)
8761 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8764 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8765 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8767 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8768 if (min > sahf_cost)
8770 if (min > fcomi_cost)
8775 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8778 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8779 rtx *second_test, rtx *bypass_test)
8781 enum machine_mode fpcmp_mode, intcmp_mode;
8783 int cost = ix86_fp_comparison_cost (code);
8784 enum rtx_code bypass_code, first_code, second_code;
8786 fpcmp_mode = ix86_fp_compare_mode (code);
8787 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8790 *second_test = NULL_RTX;
8792 *bypass_test = NULL_RTX;
8794 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8796 /* Do fcomi/sahf based test when profitable. */
8797 if ((bypass_code == NIL || bypass_test)
8798 && (second_code == NIL || second_test)
8799 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8803 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8804 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8810 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8811 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8813 scratch = gen_reg_rtx (HImode);
8814 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8815 emit_insn (gen_x86_sahf_1 (scratch));
8818 /* The FP codes work out to act like unsigned. */
8819 intcmp_mode = fpcmp_mode;
8821 if (bypass_code != NIL)
8822 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8823 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8825 if (second_code != NIL)
8826 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8827 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8832 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8833 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8834 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8836 scratch = gen_reg_rtx (HImode);
8837 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8839 /* In the unordered case, we have to check C2 for NaN's, which
8840 doesn't happen to work out to anything nice combination-wise.
8841 So do some bit twiddling on the value we've got in AH to come
8842 up with an appropriate set of condition codes. */
8844 intcmp_mode = CCNOmode;
8849 if (code == GT || !TARGET_IEEE_FP)
8851 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8856 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8857 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8858 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8859 intcmp_mode = CCmode;
8865 if (code == LT && TARGET_IEEE_FP)
8867 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8868 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8869 intcmp_mode = CCmode;
8874 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8880 if (code == GE || !TARGET_IEEE_FP)
8882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8887 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8888 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8895 if (code == LE && TARGET_IEEE_FP)
8897 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8898 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8899 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8900 intcmp_mode = CCmode;
8905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8911 if (code == EQ && TARGET_IEEE_FP)
8913 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8914 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8915 intcmp_mode = CCmode;
8920 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8927 if (code == NE && TARGET_IEEE_FP)
8929 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8930 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8936 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8942 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8946 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8955 /* Return the test that should be put into the flags user, i.e.
8956 the bcc, scc, or cmov instruction. */
8957 return gen_rtx_fmt_ee (code, VOIDmode,
8958 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8963 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8966 op0 = ix86_compare_op0;
8967 op1 = ix86_compare_op1;
8970 *second_test = NULL_RTX;
8972 *bypass_test = NULL_RTX;
8974 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8975 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8976 second_test, bypass_test);
8978 ret = ix86_expand_int_compare (code, op0, op1);
8983 /* Return true if the CODE will result in nontrivial jump sequence. */
8985 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8987 enum rtx_code bypass_code, first_code, second_code;
8990 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8991 return bypass_code != NIL || second_code != NIL;
8995 ix86_expand_branch (enum rtx_code code, rtx label)
8999 switch (GET_MODE (ix86_compare_op0))
9005 tmp = ix86_expand_compare (code, NULL, NULL);
9006 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9007 gen_rtx_LABEL_REF (VOIDmode, label),
9009 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9019 enum rtx_code bypass_code, first_code, second_code;
9021 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9024 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9026 /* Check whether we will use the natural sequence with one jump. If
9027 so, we can expand jump early. Otherwise delay expansion by
9028 creating compound insn to not confuse optimizers. */
9029 if (bypass_code == NIL && second_code == NIL
9032 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9033 gen_rtx_LABEL_REF (VOIDmode, label),
9038 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9039 ix86_compare_op0, ix86_compare_op1);
9040 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9041 gen_rtx_LABEL_REF (VOIDmode, label),
9043 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9045 use_fcomi = ix86_use_fcomi_compare (code);
9046 vec = rtvec_alloc (3 + !use_fcomi);
9047 RTVEC_ELT (vec, 0) = tmp;
9049 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9051 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9054 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9056 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9064 /* Expand DImode branch into multiple compare+branch. */
9066 rtx lo[2], hi[2], label2;
9067 enum rtx_code code1, code2, code3;
9069 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9071 tmp = ix86_compare_op0;
9072 ix86_compare_op0 = ix86_compare_op1;
9073 ix86_compare_op1 = tmp;
9074 code = swap_condition (code);
9076 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9077 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9079 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9080 avoid two branches. This costs one extra insn, so disable when
9081 optimizing for size. */
9083 if ((code == EQ || code == NE)
9085 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9090 if (hi[1] != const0_rtx)
9091 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9092 NULL_RTX, 0, OPTAB_WIDEN);
9095 if (lo[1] != const0_rtx)
9096 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9097 NULL_RTX, 0, OPTAB_WIDEN);
9099 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9100 NULL_RTX, 0, OPTAB_WIDEN);
9102 ix86_compare_op0 = tmp;
9103 ix86_compare_op1 = const0_rtx;
9104 ix86_expand_branch (code, label);
9108 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9109 op1 is a constant and the low word is zero, then we can just
9110 examine the high word. */
9112 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9115 case LT: case LTU: case GE: case GEU:
9116 ix86_compare_op0 = hi[0];
9117 ix86_compare_op1 = hi[1];
9118 ix86_expand_branch (code, label);
9124 /* Otherwise, we need two or three jumps. */
9126 label2 = gen_label_rtx ();
9129 code2 = swap_condition (code);
9130 code3 = unsigned_condition (code);
9134 case LT: case GT: case LTU: case GTU:
9137 case LE: code1 = LT; code2 = GT; break;
9138 case GE: code1 = GT; code2 = LT; break;
9139 case LEU: code1 = LTU; code2 = GTU; break;
9140 case GEU: code1 = GTU; code2 = LTU; break;
9142 case EQ: code1 = NIL; code2 = NE; break;
9143 case NE: code2 = NIL; break;
9151 * if (hi(a) < hi(b)) goto true;
9152 * if (hi(a) > hi(b)) goto false;
9153 * if (lo(a) < lo(b)) goto true;
9157 ix86_compare_op0 = hi[0];
9158 ix86_compare_op1 = hi[1];
9161 ix86_expand_branch (code1, label);
9163 ix86_expand_branch (code2, label2);
9165 ix86_compare_op0 = lo[0];
9166 ix86_compare_op1 = lo[1];
9167 ix86_expand_branch (code3, label);
9170 emit_label (label2);
9179 /* Split branch based on floating point condition. */
9181 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9182 rtx target1, rtx target2, rtx tmp)
9185 rtx label = NULL_RTX;
9187 int bypass_probability = -1, second_probability = -1, probability = -1;
9190 if (target2 != pc_rtx)
9193 code = reverse_condition_maybe_unordered (code);
9198 condition = ix86_expand_fp_compare (code, op1, op2,
9199 tmp, &second, &bypass);
9201 if (split_branch_probability >= 0)
9203 /* Distribute the probabilities across the jumps.
9204 Assume the BYPASS and SECOND to be always test
9206 probability = split_branch_probability;
9208 /* Value of 1 is low enough to make no need for probability
9209 to be updated. Later we may run some experiments and see
9210 if unordered values are more frequent in practice. */
9212 bypass_probability = 1;
9214 second_probability = 1;
9216 if (bypass != NULL_RTX)
9218 label = gen_label_rtx ();
9219 i = emit_jump_insn (gen_rtx_SET
9221 gen_rtx_IF_THEN_ELSE (VOIDmode,
9223 gen_rtx_LABEL_REF (VOIDmode,
9226 if (bypass_probability >= 0)
9228 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9229 GEN_INT (bypass_probability),
9232 i = emit_jump_insn (gen_rtx_SET
9234 gen_rtx_IF_THEN_ELSE (VOIDmode,
9235 condition, target1, target2)));
9236 if (probability >= 0)
9238 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9239 GEN_INT (probability),
9241 if (second != NULL_RTX)
9243 i = emit_jump_insn (gen_rtx_SET
9245 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9247 if (second_probability >= 0)
9249 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9250 GEN_INT (second_probability),
9253 if (label != NULL_RTX)
9258 ix86_expand_setcc (enum rtx_code code, rtx dest)
9260 rtx ret, tmp, tmpreg;
9261 rtx second_test, bypass_test;
9263 if (GET_MODE (ix86_compare_op0) == DImode
9265 return 0; /* FAIL */
9267 if (GET_MODE (dest) != QImode)
9270 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9271 PUT_MODE (ret, QImode);
9276 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9277 if (bypass_test || second_test)
9279 rtx test = second_test;
9281 rtx tmp2 = gen_reg_rtx (QImode);
9288 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9290 PUT_MODE (test, QImode);
9291 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9294 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9296 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9299 return 1; /* DONE */
9302 /* Expand comparison setting or clearing carry flag. Return true when successful
9303 and set pop for the operation. */
9305 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9307 enum machine_mode mode =
9308 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9310 /* Do not handle DImode compares that go trought special path. Also we can't
9311 deal with FP compares yet. This is possible to add. */
9312 if ((mode == DImode && !TARGET_64BIT))
9314 if (FLOAT_MODE_P (mode))
9316 rtx second_test = NULL, bypass_test = NULL;
9317 rtx compare_op, compare_seq;
9319 /* Shortcut: following common codes never translate into carry flag compares. */
9320 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9321 || code == ORDERED || code == UNORDERED)
9324 /* These comparisons require zero flag; swap operands so they won't. */
9325 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9331 code = swap_condition (code);
9334 /* Try to expand the comparison and verify that we end up with carry flag
9335 based comparison. This is fails to be true only when we decide to expand
9336 comparison using arithmetic that is not too common scenario. */
9338 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9339 &second_test, &bypass_test);
9340 compare_seq = get_insns ();
9343 if (second_test || bypass_test)
9345 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9346 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9347 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9349 code = GET_CODE (compare_op);
9350 if (code != LTU && code != GEU)
9352 emit_insn (compare_seq);
9356 if (!INTEGRAL_MODE_P (mode))
9364 /* Convert a==0 into (unsigned)a<1. */
9367 if (op1 != const0_rtx)
9370 code = (code == EQ ? LTU : GEU);
9373 /* Convert a>b into b<a or a>=b-1. */
9376 if (GET_CODE (op1) == CONST_INT)
9378 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9379 /* Bail out on overflow. We still can swap operands but that
9380 would force loading of the constant into register. */
9381 if (op1 == const0_rtx
9382 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9384 code = (code == GTU ? GEU : LTU);
9386 else if (!nonimmediate_operand (op1, mode)
9387 || !general_operand (op0, mode))
9388 /* Swapping operands in this case would generate an
9389 unrecognizable insn. */
9396 code = (code == GTU ? LTU : GEU);
9400 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9403 if (mode == DImode || op1 != const0_rtx)
9405 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9406 code = (code == LT ? GEU : LTU);
9410 if (mode == DImode || op1 != constm1_rtx)
9412 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9413 code = (code == LE ? GEU : LTU);
9419 ix86_compare_op0 = op0;
9420 ix86_compare_op1 = op1;
9421 *pop = ix86_expand_compare (code, NULL, NULL);
9422 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9428 ix86_expand_int_movcc (rtx operands[])
9430 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9431 rtx compare_seq, compare_op;
9432 rtx second_test, bypass_test;
9433 enum machine_mode mode = GET_MODE (operands[0]);
9434 bool sign_bit_compare_p = false;;
9437 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9438 compare_seq = get_insns ();
9441 compare_code = GET_CODE (compare_op);
9443 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9444 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9445 sign_bit_compare_p = true;
9447 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9448 HImode insns, we'd be swallowed in word prefix ops. */
9450 if ((mode != HImode || TARGET_FAST_PREFIX)
9451 && (mode != DImode || TARGET_64BIT)
9452 && GET_CODE (operands[2]) == CONST_INT
9453 && GET_CODE (operands[3]) == CONST_INT)
9455 rtx out = operands[0];
9456 HOST_WIDE_INT ct = INTVAL (operands[2]);
9457 HOST_WIDE_INT cf = INTVAL (operands[3]);
9461 /* Sign bit compares are better done using shifts than we do by using
9463 if (sign_bit_compare_p
9464 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9465 ix86_compare_op1, &compare_op))
9467 /* Detect overlap between destination and compare sources. */
9470 if (!sign_bit_compare_p)
9474 compare_code = GET_CODE (compare_op);
9476 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9477 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9480 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9483 /* To simplify rest of code, restrict to the GEU case. */
9484 if (compare_code == LTU)
9486 HOST_WIDE_INT tmp = ct;
9489 compare_code = reverse_condition (compare_code);
9490 code = reverse_condition (code);
9495 PUT_CODE (compare_op,
9496 reverse_condition_maybe_unordered
9497 (GET_CODE (compare_op)));
9499 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9503 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9504 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9505 tmp = gen_reg_rtx (mode);
9508 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9510 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9514 if (code == GT || code == GE)
9515 code = reverse_condition (code);
9518 HOST_WIDE_INT tmp = ct;
9523 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9524 ix86_compare_op1, VOIDmode, 0, -1);
9537 tmp = expand_simple_binop (mode, PLUS,
9539 copy_rtx (tmp), 1, OPTAB_DIRECT);
9550 tmp = expand_simple_binop (mode, IOR,
9552 copy_rtx (tmp), 1, OPTAB_DIRECT);
9554 else if (diff == -1 && ct)
9564 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9566 tmp = expand_simple_binop (mode, PLUS,
9567 copy_rtx (tmp), GEN_INT (cf),
9568 copy_rtx (tmp), 1, OPTAB_DIRECT);
9576 * andl cf - ct, dest
9586 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9589 tmp = expand_simple_binop (mode, AND,
9591 gen_int_mode (cf - ct, mode),
9592 copy_rtx (tmp), 1, OPTAB_DIRECT);
9594 tmp = expand_simple_binop (mode, PLUS,
9595 copy_rtx (tmp), GEN_INT (ct),
9596 copy_rtx (tmp), 1, OPTAB_DIRECT);
9599 if (!rtx_equal_p (tmp, out))
9600 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9602 return 1; /* DONE */
9608 tmp = ct, ct = cf, cf = tmp;
9610 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9612 /* We may be reversing unordered compare to normal compare, that
9613 is not valid in general (we may convert non-trapping condition
9614 to trapping one), however on i386 we currently emit all
9615 comparisons unordered. */
9616 compare_code = reverse_condition_maybe_unordered (compare_code);
9617 code = reverse_condition_maybe_unordered (code);
9621 compare_code = reverse_condition (compare_code);
9622 code = reverse_condition (code);
9627 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9628 && GET_CODE (ix86_compare_op1) == CONST_INT)
9630 if (ix86_compare_op1 == const0_rtx
9631 && (code == LT || code == GE))
9632 compare_code = code;
9633 else if (ix86_compare_op1 == constm1_rtx)
9637 else if (code == GT)
9642 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9643 if (compare_code != NIL
9644 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9645 && (cf == -1 || ct == -1))
9647 /* If lea code below could be used, only optimize
9648 if it results in a 2 insn sequence. */
9650 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9651 || diff == 3 || diff == 5 || diff == 9)
9652 || (compare_code == LT && ct == -1)
9653 || (compare_code == GE && cf == -1))
9656 * notl op1 (if necessary)
9664 code = reverse_condition (code);
9667 out = emit_store_flag (out, code, ix86_compare_op0,
9668 ix86_compare_op1, VOIDmode, 0, -1);
9670 out = expand_simple_binop (mode, IOR,
9672 out, 1, OPTAB_DIRECT);
9673 if (out != operands[0])
9674 emit_move_insn (operands[0], out);
9676 return 1; /* DONE */
9681 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9682 || diff == 3 || diff == 5 || diff == 9)
9683 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9684 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9690 * lea cf(dest*(ct-cf)),dest
9694 * This also catches the degenerate setcc-only case.
9700 out = emit_store_flag (out, code, ix86_compare_op0,
9701 ix86_compare_op1, VOIDmode, 0, 1);
9704 /* On x86_64 the lea instruction operates on Pmode, so we need
9705 to get arithmetics done in proper mode to match. */
9707 tmp = copy_rtx (out);
9711 out1 = copy_rtx (out);
9712 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9716 tmp = gen_rtx_PLUS (mode, tmp, out1);
9722 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9725 if (!rtx_equal_p (tmp, out))
9728 out = force_operand (tmp, copy_rtx (out));
9730 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9732 if (!rtx_equal_p (out, operands[0]))
9733 emit_move_insn (operands[0], copy_rtx (out));
9735 return 1; /* DONE */
9739 * General case: Jumpful:
9740 * xorl dest,dest cmpl op1, op2
9741 * cmpl op1, op2 movl ct, dest
9743 * decl dest movl cf, dest
9744 * andl (cf-ct),dest 1:
9749 * This is reasonably steep, but branch mispredict costs are
9750 * high on modern cpus, so consider failing only if optimizing
9754 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9755 && BRANCH_COST >= 2)
9761 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9762 /* We may be reversing unordered compare to normal compare,
9763 that is not valid in general (we may convert non-trapping
9764 condition to trapping one), however on i386 we currently
9765 emit all comparisons unordered. */
9766 code = reverse_condition_maybe_unordered (code);
9769 code = reverse_condition (code);
9770 if (compare_code != NIL)
9771 compare_code = reverse_condition (compare_code);
9775 if (compare_code != NIL)
9777 /* notl op1 (if needed)
9782 For x < 0 (resp. x <= -1) there will be no notl,
9783 so if possible swap the constants to get rid of the
9785 True/false will be -1/0 while code below (store flag
9786 followed by decrement) is 0/-1, so the constants need
9787 to be exchanged once more. */
9789 if (compare_code == GE || !cf)
9791 code = reverse_condition (code);
9796 HOST_WIDE_INT tmp = cf;
9801 out = emit_store_flag (out, code, ix86_compare_op0,
9802 ix86_compare_op1, VOIDmode, 0, -1);
9806 out = emit_store_flag (out, code, ix86_compare_op0,
9807 ix86_compare_op1, VOIDmode, 0, 1);
9809 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9810 copy_rtx (out), 1, OPTAB_DIRECT);
9813 out = expand_simple_binop (mode, AND, copy_rtx (out),
9814 gen_int_mode (cf - ct, mode),
9815 copy_rtx (out), 1, OPTAB_DIRECT);
9817 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9818 copy_rtx (out), 1, OPTAB_DIRECT);
9819 if (!rtx_equal_p (out, operands[0]))
9820 emit_move_insn (operands[0], copy_rtx (out));
9822 return 1; /* DONE */
9826 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9828 /* Try a few things more with specific constants and a variable. */
9831 rtx var, orig_out, out, tmp;
9833 if (BRANCH_COST <= 2)
9834 return 0; /* FAIL */
9836 /* If one of the two operands is an interesting constant, load a
9837 constant with the above and mask it in with a logical operation. */
9839 if (GET_CODE (operands[2]) == CONST_INT)
9842 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9843 operands[3] = constm1_rtx, op = and_optab;
9844 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9845 operands[3] = const0_rtx, op = ior_optab;
9847 return 0; /* FAIL */
9849 else if (GET_CODE (operands[3]) == CONST_INT)
9852 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9853 operands[2] = constm1_rtx, op = and_optab;
9854 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9855 operands[2] = const0_rtx, op = ior_optab;
9857 return 0; /* FAIL */
9860 return 0; /* FAIL */
9862 orig_out = operands[0];
9863 tmp = gen_reg_rtx (mode);
9866 /* Recurse to get the constant loaded. */
9867 if (ix86_expand_int_movcc (operands) == 0)
9868 return 0; /* FAIL */
9870 /* Mask in the interesting variable. */
9871 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9873 if (!rtx_equal_p (out, orig_out))
9874 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9876 return 1; /* DONE */
9880 * For comparison with above,
9890 if (! nonimmediate_operand (operands[2], mode))
9891 operands[2] = force_reg (mode, operands[2]);
9892 if (! nonimmediate_operand (operands[3], mode))
9893 operands[3] = force_reg (mode, operands[3]);
9895 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9897 rtx tmp = gen_reg_rtx (mode);
9898 emit_move_insn (tmp, operands[3]);
9901 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9903 rtx tmp = gen_reg_rtx (mode);
9904 emit_move_insn (tmp, operands[2]);
9908 if (! register_operand (operands[2], VOIDmode)
9910 || ! register_operand (operands[3], VOIDmode)))
9911 operands[2] = force_reg (mode, operands[2]);
9914 && ! register_operand (operands[3], VOIDmode))
9915 operands[3] = force_reg (mode, operands[3]);
9917 emit_insn (compare_seq);
9918 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9919 gen_rtx_IF_THEN_ELSE (mode,
9920 compare_op, operands[2],
9923 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9924 gen_rtx_IF_THEN_ELSE (mode,
9926 copy_rtx (operands[3]),
9927 copy_rtx (operands[0]))));
9929 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9930 gen_rtx_IF_THEN_ELSE (mode,
9932 copy_rtx (operands[2]),
9933 copy_rtx (operands[0]))));
9935 return 1; /* DONE */
9939 ix86_expand_fp_movcc (rtx operands[])
9943 rtx compare_op, second_test, bypass_test;
9945 /* For SF/DFmode conditional moves based on comparisons
9946 in same mode, we may want to use SSE min/max instructions. */
9947 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9948 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9949 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9950 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9952 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9953 /* We may be called from the post-reload splitter. */
9954 && (!REG_P (operands[0])
9955 || SSE_REG_P (operands[0])
9956 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9958 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9959 code = GET_CODE (operands[1]);
9961 /* See if we have (cross) match between comparison operands and
9962 conditional move operands. */
9963 if (rtx_equal_p (operands[2], op1))
9968 code = reverse_condition_maybe_unordered (code);
9970 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9972 /* Check for min operation. */
9973 if (code == LT || code == UNLE)
9981 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9982 if (memory_operand (op0, VOIDmode))
9983 op0 = force_reg (GET_MODE (operands[0]), op0);
9984 if (GET_MODE (operands[0]) == SFmode)
9985 emit_insn (gen_minsf3 (operands[0], op0, op1));
9987 emit_insn (gen_mindf3 (operands[0], op0, op1));
9990 /* Check for max operation. */
9991 if (code == GT || code == UNGE)
9999 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10000 if (memory_operand (op0, VOIDmode))
10001 op0 = force_reg (GET_MODE (operands[0]), op0);
10002 if (GET_MODE (operands[0]) == SFmode)
10003 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10005 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10009 /* Manage condition to be sse_comparison_operator. In case we are
10010 in non-ieee mode, try to canonicalize the destination operand
10011 to be first in the comparison - this helps reload to avoid extra
10013 if (!sse_comparison_operator (operands[1], VOIDmode)
10014 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10016 rtx tmp = ix86_compare_op0;
10017 ix86_compare_op0 = ix86_compare_op1;
10018 ix86_compare_op1 = tmp;
10019 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10020 VOIDmode, ix86_compare_op0,
10023 /* Similarly try to manage result to be first operand of conditional
10024 move. We also don't support the NE comparison on SSE, so try to
10026 if ((rtx_equal_p (operands[0], operands[3])
10027 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10028 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10030 rtx tmp = operands[2];
10031 operands[2] = operands[3];
10033 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10034 (GET_CODE (operands[1])),
10035 VOIDmode, ix86_compare_op0,
10038 if (GET_MODE (operands[0]) == SFmode)
10039 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10040 operands[2], operands[3],
10041 ix86_compare_op0, ix86_compare_op1));
10043 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10044 operands[2], operands[3],
10045 ix86_compare_op0, ix86_compare_op1));
10049 /* The floating point conditional move instructions don't directly
10050 support conditions resulting from a signed integer comparison. */
10052 code = GET_CODE (operands[1]);
10053 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10055 /* The floating point conditional move instructions don't directly
10056 support signed integer comparisons. */
10058 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10060 if (second_test != NULL || bypass_test != NULL)
10062 tmp = gen_reg_rtx (QImode);
10063 ix86_expand_setcc (code, tmp);
10065 ix86_compare_op0 = tmp;
10066 ix86_compare_op1 = const0_rtx;
10067 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10069 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10071 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10072 emit_move_insn (tmp, operands[3]);
10075 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10077 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10078 emit_move_insn (tmp, operands[2]);
10082 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10083 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10088 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10089 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10095 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10103 /* Expand conditional increment or decrement using adb/sbb instructions.
10104 The default case using setcc followed by the conditional move can be
10105 done by generic code. */
10107 ix86_expand_int_addcc (rtx operands[])
10109 enum rtx_code code = GET_CODE (operands[1]);
10111 rtx val = const0_rtx;
10112 bool fpcmp = false;
10113 enum machine_mode mode = GET_MODE (operands[0]);
10115 if (operands[3] != const1_rtx
10116 && operands[3] != constm1_rtx)
10118 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10119 ix86_compare_op1, &compare_op))
10121 code = GET_CODE (compare_op);
10123 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10124 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10127 code = ix86_fp_compare_code_to_integer (code);
10134 PUT_CODE (compare_op,
10135 reverse_condition_maybe_unordered
10136 (GET_CODE (compare_op)));
10138 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10140 PUT_MODE (compare_op, mode);
10142 /* Construct either adc or sbb insn. */
10143 if ((code == LTU) == (operands[3] == constm1_rtx))
10145 switch (GET_MODE (operands[0]))
10148 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10151 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10154 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10157 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10165 switch (GET_MODE (operands[0]))
10168 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10171 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10174 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10177 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10183 return 1; /* DONE */
10187 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10188 works for floating pointer parameters and nonoffsetable memories.
10189 For pushes, it returns just stack offsets; the values will be saved
10190 in the right order. Maximally three parts are generated. */
10193 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10198 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10200 size = (GET_MODE_SIZE (mode) + 4) / 8;
10202 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10204 if (size < 2 || size > 3)
10207 /* Optimize constant pool reference to immediates. This is used by fp
10208 moves, that force all constants to memory to allow combining. */
10209 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10211 rtx tmp = maybe_get_pool_constant (operand);
10216 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10218 /* The only non-offsetable memories we handle are pushes. */
10219 if (! push_operand (operand, VOIDmode))
10222 operand = copy_rtx (operand);
10223 PUT_MODE (operand, Pmode);
10224 parts[0] = parts[1] = parts[2] = operand;
10226 else if (!TARGET_64BIT)
10228 if (mode == DImode)
10229 split_di (&operand, 1, &parts[0], &parts[1]);
10232 if (REG_P (operand))
10234 if (!reload_completed)
10236 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10237 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10239 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10241 else if (offsettable_memref_p (operand))
10243 operand = adjust_address (operand, SImode, 0);
10244 parts[0] = operand;
10245 parts[1] = adjust_address (operand, SImode, 4);
10247 parts[2] = adjust_address (operand, SImode, 8);
10249 else if (GET_CODE (operand) == CONST_DOUBLE)
10254 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10259 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10260 parts[2] = gen_int_mode (l[2], SImode);
10263 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10268 parts[1] = gen_int_mode (l[1], SImode);
10269 parts[0] = gen_int_mode (l[0], SImode);
10277 if (mode == TImode)
10278 split_ti (&operand, 1, &parts[0], &parts[1]);
10279 if (mode == XFmode || mode == TFmode)
10281 if (REG_P (operand))
10283 if (!reload_completed)
10285 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10286 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10288 else if (offsettable_memref_p (operand))
10290 operand = adjust_address (operand, DImode, 0);
10291 parts[0] = operand;
10292 parts[1] = adjust_address (operand, SImode, 8);
10294 else if (GET_CODE (operand) == CONST_DOUBLE)
10299 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10300 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10301 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10302 if (HOST_BITS_PER_WIDE_INT >= 64)
10305 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10306 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10309 parts[0] = immed_double_const (l[0], l[1], DImode);
10310 parts[1] = gen_int_mode (l[2], SImode);
10320 /* Emit insns to perform a move or push of DI, DF, and XF values.
10321 Return false when normal moves are needed; true when all required
10322 insns have been emitted. Operands 2-4 contain the input values
10323 int the correct order; operands 5-7 contain the output values. */
10326 ix86_split_long_move (rtx operands[])
10331 int collisions = 0;
10332 enum machine_mode mode = GET_MODE (operands[0]);
10334 /* The DFmode expanders may ask us to move double.
10335 For 64bit target this is single move. By hiding the fact
10336 here we simplify i386.md splitters. */
10337 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10339 /* Optimize constant pool reference to immediates. This is used by
10340 fp moves, that force all constants to memory to allow combining. */
10342 if (GET_CODE (operands[1]) == MEM
10343 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10344 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10345 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10346 if (push_operand (operands[0], VOIDmode))
10348 operands[0] = copy_rtx (operands[0]);
10349 PUT_MODE (operands[0], Pmode);
10352 operands[0] = gen_lowpart (DImode, operands[0]);
10353 operands[1] = gen_lowpart (DImode, operands[1]);
10354 emit_move_insn (operands[0], operands[1]);
10358 /* The only non-offsettable memory we handle is push. */
10359 if (push_operand (operands[0], VOIDmode))
10361 else if (GET_CODE (operands[0]) == MEM
10362 && ! offsettable_memref_p (operands[0]))
10365 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10366 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10368 /* When emitting push, take care for source operands on the stack. */
10369 if (push && GET_CODE (operands[1]) == MEM
10370 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10373 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10374 XEXP (part[1][2], 0));
10375 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10376 XEXP (part[1][1], 0));
10379 /* We need to do copy in the right order in case an address register
10380 of the source overlaps the destination. */
10381 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10383 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10385 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10388 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10391 /* Collision in the middle part can be handled by reordering. */
10392 if (collisions == 1 && nparts == 3
10393 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10396 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10397 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10400 /* If there are more collisions, we can't handle it by reordering.
10401 Do an lea to the last part and use only one colliding move. */
10402 else if (collisions > 1)
10408 base = part[0][nparts - 1];
10410 /* Handle the case when the last part isn't valid for lea.
10411 Happens in 64-bit mode storing the 12-byte XFmode. */
10412 if (GET_MODE (base) != Pmode)
10413 base = gen_rtx_REG (Pmode, REGNO (base));
10415 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10416 part[1][0] = replace_equiv_address (part[1][0], base);
10417 part[1][1] = replace_equiv_address (part[1][1],
10418 plus_constant (base, UNITS_PER_WORD));
10420 part[1][2] = replace_equiv_address (part[1][2],
10421 plus_constant (base, 8));
10431 /* We use only first 12 bytes of TFmode value, but for pushing we
10432 are required to adjust stack as if we were pushing real 16byte
10434 if (mode == TFmode && !TARGET_64BIT)
10435 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10437 emit_move_insn (part[0][2], part[1][2]);
10442 /* In 64bit mode we don't have 32bit push available. In case this is
10443 register, it is OK - we will just use larger counterpart. We also
10444 retype memory - these comes from attempt to avoid REX prefix on
10445 moving of second half of TFmode value. */
10446 if (GET_MODE (part[1][1]) == SImode)
10448 if (GET_CODE (part[1][1]) == MEM)
10449 part[1][1] = adjust_address (part[1][1], DImode, 0);
10450 else if (REG_P (part[1][1]))
10451 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10454 if (GET_MODE (part[1][0]) == SImode)
10455 part[1][0] = part[1][1];
10458 emit_move_insn (part[0][1], part[1][1]);
10459 emit_move_insn (part[0][0], part[1][0]);
10463 /* Choose correct order to not overwrite the source before it is copied. */
10464 if ((REG_P (part[0][0])
10465 && REG_P (part[1][1])
10466 && (REGNO (part[0][0]) == REGNO (part[1][1])
10468 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10470 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10474 operands[2] = part[0][2];
10475 operands[3] = part[0][1];
10476 operands[4] = part[0][0];
10477 operands[5] = part[1][2];
10478 operands[6] = part[1][1];
10479 operands[7] = part[1][0];
10483 operands[2] = part[0][1];
10484 operands[3] = part[0][0];
10485 operands[5] = part[1][1];
10486 operands[6] = part[1][0];
10493 operands[2] = part[0][0];
10494 operands[3] = part[0][1];
10495 operands[4] = part[0][2];
10496 operands[5] = part[1][0];
10497 operands[6] = part[1][1];
10498 operands[7] = part[1][2];
10502 operands[2] = part[0][0];
10503 operands[3] = part[0][1];
10504 operands[5] = part[1][0];
10505 operands[6] = part[1][1];
10508 emit_move_insn (operands[2], operands[5]);
10509 emit_move_insn (operands[3], operands[6]);
10511 emit_move_insn (operands[4], operands[7]);
10517 ix86_split_ashldi (rtx *operands, rtx scratch)
10519 rtx low[2], high[2];
10522 if (GET_CODE (operands[2]) == CONST_INT)
10524 split_di (operands, 2, low, high);
10525 count = INTVAL (operands[2]) & 63;
10529 emit_move_insn (high[0], low[1]);
10530 emit_move_insn (low[0], const0_rtx);
10533 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10537 if (!rtx_equal_p (operands[0], operands[1]))
10538 emit_move_insn (operands[0], operands[1]);
10539 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10540 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10545 if (!rtx_equal_p (operands[0], operands[1]))
10546 emit_move_insn (operands[0], operands[1]);
10548 split_di (operands, 1, low, high);
10550 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10551 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10553 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10555 if (! no_new_pseudos)
10556 scratch = force_reg (SImode, const0_rtx);
10558 emit_move_insn (scratch, const0_rtx);
10560 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10564 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10569 ix86_split_ashrdi (rtx *operands, rtx scratch)
10571 rtx low[2], high[2];
10574 if (GET_CODE (operands[2]) == CONST_INT)
10576 split_di (operands, 2, low, high);
10577 count = INTVAL (operands[2]) & 63;
10581 emit_move_insn (low[0], high[1]);
10583 if (! reload_completed)
10584 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10587 emit_move_insn (high[0], low[0]);
10588 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10592 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10596 if (!rtx_equal_p (operands[0], operands[1]))
10597 emit_move_insn (operands[0], operands[1]);
10598 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10599 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10604 if (!rtx_equal_p (operands[0], operands[1]))
10605 emit_move_insn (operands[0], operands[1]);
10607 split_di (operands, 1, low, high);
10609 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10610 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10612 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10614 if (! no_new_pseudos)
10615 scratch = gen_reg_rtx (SImode);
10616 emit_move_insn (scratch, high[0]);
10617 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10618 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10622 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10627 ix86_split_lshrdi (rtx *operands, rtx scratch)
10629 rtx low[2], high[2];
10632 if (GET_CODE (operands[2]) == CONST_INT)
10634 split_di (operands, 2, low, high);
10635 count = INTVAL (operands[2]) & 63;
10639 emit_move_insn (low[0], high[1]);
10640 emit_move_insn (high[0], const0_rtx);
10643 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10647 if (!rtx_equal_p (operands[0], operands[1]))
10648 emit_move_insn (operands[0], operands[1]);
10649 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10650 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10655 if (!rtx_equal_p (operands[0], operands[1]))
10656 emit_move_insn (operands[0], operands[1]);
10658 split_di (operands, 1, low, high);
10660 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10661 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10663 /* Heh. By reversing the arguments, we can reuse this pattern. */
10664 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10666 if (! no_new_pseudos)
10667 scratch = force_reg (SImode, const0_rtx);
10669 emit_move_insn (scratch, const0_rtx);
10671 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10675 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10679 /* Helper function for the string operations below. Dest VARIABLE whether
10680 it is aligned to VALUE bytes. If true, jump to the label. */
10682 ix86_expand_aligntest (rtx variable, int value)
10684 rtx label = gen_label_rtx ();
10685 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10686 if (GET_MODE (variable) == DImode)
10687 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10689 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10690 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10695 /* Adjust COUNTER by the VALUE. */
10697 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10699 if (GET_MODE (countreg) == DImode)
10700 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10702 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10705 /* Zero extend possibly SImode EXP to Pmode register. */
10707 ix86_zero_extend_to_Pmode (rtx exp)
10710 if (GET_MODE (exp) == VOIDmode)
10711 return force_reg (Pmode, exp);
10712 if (GET_MODE (exp) == Pmode)
10713 return copy_to_mode_reg (Pmode, exp);
10714 r = gen_reg_rtx (Pmode);
10715 emit_insn (gen_zero_extendsidi2 (r, exp));
10719 /* Expand string move (memcpy) operation. Use i386 string operations when
10720 profitable. expand_clrstr contains similar code. */
10722 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10724 rtx srcreg, destreg, countreg;
10725 enum machine_mode counter_mode;
10726 HOST_WIDE_INT align = 0;
10727 unsigned HOST_WIDE_INT count = 0;
10730 if (GET_CODE (align_exp) == CONST_INT)
10731 align = INTVAL (align_exp);
10733 /* Can't use any of this if the user has appropriated esi or edi. */
10734 if (global_regs[4] || global_regs[5])
10737 /* This simple hack avoids all inlining code and simplifies code below. */
10738 if (!TARGET_ALIGN_STRINGOPS)
10741 if (GET_CODE (count_exp) == CONST_INT)
10743 count = INTVAL (count_exp);
10744 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10748 /* Figure out proper mode for counter. For 32bits it is always SImode,
10749 for 64bits use SImode when possible, otherwise DImode.
10750 Set count to number of bytes copied when known at compile time. */
10751 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10752 || x86_64_zero_extended_value (count_exp))
10753 counter_mode = SImode;
10755 counter_mode = DImode;
10759 if (counter_mode != SImode && counter_mode != DImode)
10762 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10763 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10765 emit_insn (gen_cld ());
10767 /* When optimizing for size emit simple rep ; movsb instruction for
10768 counts not divisible by 4. */
10770 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10772 countreg = ix86_zero_extend_to_Pmode (count_exp);
10774 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10775 destreg, srcreg, countreg));
10777 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10778 destreg, srcreg, countreg));
10781 /* For constant aligned (or small unaligned) copies use rep movsl
10782 followed by code copying the rest. For PentiumPro ensure 8 byte
10783 alignment to allow rep movsl acceleration. */
10785 else if (count != 0
10787 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10788 || optimize_size || count < (unsigned int) 64))
10790 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10791 if (count & ~(size - 1))
10793 countreg = copy_to_mode_reg (counter_mode,
10794 GEN_INT ((count >> (size == 4 ? 2 : 3))
10795 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10796 countreg = ix86_zero_extend_to_Pmode (countreg);
10800 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10801 destreg, srcreg, countreg));
10803 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10804 destreg, srcreg, countreg));
10807 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10808 destreg, srcreg, countreg));
10810 if (size == 8 && (count & 0x04))
10811 emit_insn (gen_strmovsi (destreg, srcreg));
10813 emit_insn (gen_strmovhi (destreg, srcreg));
10815 emit_insn (gen_strmovqi (destreg, srcreg));
10817 /* The generic code based on the glibc implementation:
10818 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10819 allowing accelerated copying there)
10820 - copy the data using rep movsl
10821 - copy the rest. */
10826 int desired_alignment = (TARGET_PENTIUMPRO
10827 && (count == 0 || count >= (unsigned int) 260)
10828 ? 8 : UNITS_PER_WORD);
10830 /* In case we don't know anything about the alignment, default to
10831 library version, since it is usually equally fast and result in
10834 Also emit call when we know that the count is large and call overhead
10835 will not be important. */
10836 if (!TARGET_INLINE_ALL_STRINGOPS
10837 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10843 if (TARGET_SINGLE_STRINGOP)
10844 emit_insn (gen_cld ());
10846 countreg2 = gen_reg_rtx (Pmode);
10847 countreg = copy_to_mode_reg (counter_mode, count_exp);
10849 /* We don't use loops to align destination and to copy parts smaller
10850 than 4 bytes, because gcc is able to optimize such code better (in
10851 the case the destination or the count really is aligned, gcc is often
10852 able to predict the branches) and also it is friendlier to the
10853 hardware branch prediction.
10855 Using loops is beneficial for generic case, because we can
10856 handle small counts using the loops. Many CPUs (such as Athlon)
10857 have large REP prefix setup costs.
10859 This is quite costly. Maybe we can revisit this decision later or
10860 add some customizability to this code. */
10862 if (count == 0 && align < desired_alignment)
10864 label = gen_label_rtx ();
10865 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10866 LEU, 0, counter_mode, 1, label);
10870 rtx label = ix86_expand_aligntest (destreg, 1);
10871 emit_insn (gen_strmovqi (destreg, srcreg));
10872 ix86_adjust_counter (countreg, 1);
10873 emit_label (label);
10874 LABEL_NUSES (label) = 1;
10878 rtx label = ix86_expand_aligntest (destreg, 2);
10879 emit_insn (gen_strmovhi (destreg, srcreg));
10880 ix86_adjust_counter (countreg, 2);
10881 emit_label (label);
10882 LABEL_NUSES (label) = 1;
10884 if (align <= 4 && desired_alignment > 4)
10886 rtx label = ix86_expand_aligntest (destreg, 4);
10887 emit_insn (gen_strmovsi (destreg, srcreg));
10888 ix86_adjust_counter (countreg, 4);
10889 emit_label (label);
10890 LABEL_NUSES (label) = 1;
10893 if (label && desired_alignment > 4 && !TARGET_64BIT)
10895 emit_label (label);
10896 LABEL_NUSES (label) = 1;
10899 if (!TARGET_SINGLE_STRINGOP)
10900 emit_insn (gen_cld ());
10903 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10905 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10906 destreg, srcreg, countreg2));
10910 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10911 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10912 destreg, srcreg, countreg2));
10917 emit_label (label);
10918 LABEL_NUSES (label) = 1;
10920 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10921 emit_insn (gen_strmovsi (destreg, srcreg));
10922 if ((align <= 4 || count == 0) && TARGET_64BIT)
10924 rtx label = ix86_expand_aligntest (countreg, 4);
10925 emit_insn (gen_strmovsi (destreg, srcreg));
10926 emit_label (label);
10927 LABEL_NUSES (label) = 1;
10929 if (align > 2 && count != 0 && (count & 2))
10930 emit_insn (gen_strmovhi (destreg, srcreg));
10931 if (align <= 2 || count == 0)
10933 rtx label = ix86_expand_aligntest (countreg, 2);
10934 emit_insn (gen_strmovhi (destreg, srcreg));
10935 emit_label (label);
10936 LABEL_NUSES (label) = 1;
10938 if (align > 1 && count != 0 && (count & 1))
10939 emit_insn (gen_strmovqi (destreg, srcreg));
10940 if (align <= 1 || count == 0)
10942 rtx label = ix86_expand_aligntest (countreg, 1);
10943 emit_insn (gen_strmovqi (destreg, srcreg));
10944 emit_label (label);
10945 LABEL_NUSES (label) = 1;
10949 insns = get_insns ();
10952 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10957 /* Expand string clear operation (bzero). Use i386 string operations when
10958 profitable. expand_movstr contains similar code. */
10960 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
10962 rtx destreg, zeroreg, countreg;
10963 enum machine_mode counter_mode;
10964 HOST_WIDE_INT align = 0;
10965 unsigned HOST_WIDE_INT count = 0;
10967 if (GET_CODE (align_exp) == CONST_INT)
10968 align = INTVAL (align_exp);
10970 /* Can't use any of this if the user has appropriated esi. */
10971 if (global_regs[4])
10974 /* This simple hack avoids all inlining code and simplifies code below. */
10975 if (!TARGET_ALIGN_STRINGOPS)
10978 if (GET_CODE (count_exp) == CONST_INT)
10980 count = INTVAL (count_exp);
10981 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10984 /* Figure out proper mode for counter. For 32bits it is always SImode,
10985 for 64bits use SImode when possible, otherwise DImode.
10986 Set count to number of bytes copied when known at compile time. */
10987 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10988 || x86_64_zero_extended_value (count_exp))
10989 counter_mode = SImode;
10991 counter_mode = DImode;
10993 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10995 emit_insn (gen_cld ());
10997 /* When optimizing for size emit simple rep ; movsb instruction for
10998 counts not divisible by 4. */
11000 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11002 countreg = ix86_zero_extend_to_Pmode (count_exp);
11003 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11005 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11006 destreg, countreg));
11008 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11009 destreg, countreg));
11011 else if (count != 0
11013 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11014 || optimize_size || count < (unsigned int) 64))
11016 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11017 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11018 if (count & ~(size - 1))
11020 countreg = copy_to_mode_reg (counter_mode,
11021 GEN_INT ((count >> (size == 4 ? 2 : 3))
11022 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11023 countreg = ix86_zero_extend_to_Pmode (countreg);
11027 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11028 destreg, countreg));
11030 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11031 destreg, countreg));
11034 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11035 destreg, countreg));
11037 if (size == 8 && (count & 0x04))
11038 emit_insn (gen_strsetsi (destreg,
11039 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11041 emit_insn (gen_strsethi (destreg,
11042 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11044 emit_insn (gen_strsetqi (destreg,
11045 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11051 /* Compute desired alignment of the string operation. */
11052 int desired_alignment = (TARGET_PENTIUMPRO
11053 && (count == 0 || count >= (unsigned int) 260)
11054 ? 8 : UNITS_PER_WORD);
11056 /* In case we don't know anything about the alignment, default to
11057 library version, since it is usually equally fast and result in
11060 Also emit call when we know that the count is large and call overhead
11061 will not be important. */
11062 if (!TARGET_INLINE_ALL_STRINGOPS
11063 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11066 if (TARGET_SINGLE_STRINGOP)
11067 emit_insn (gen_cld ());
11069 countreg2 = gen_reg_rtx (Pmode);
11070 countreg = copy_to_mode_reg (counter_mode, count_exp);
11071 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11073 if (count == 0 && align < desired_alignment)
11075 label = gen_label_rtx ();
11076 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11077 LEU, 0, counter_mode, 1, label);
11081 rtx label = ix86_expand_aligntest (destreg, 1);
11082 emit_insn (gen_strsetqi (destreg,
11083 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11084 ix86_adjust_counter (countreg, 1);
11085 emit_label (label);
11086 LABEL_NUSES (label) = 1;
11090 rtx label = ix86_expand_aligntest (destreg, 2);
11091 emit_insn (gen_strsethi (destreg,
11092 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11093 ix86_adjust_counter (countreg, 2);
11094 emit_label (label);
11095 LABEL_NUSES (label) = 1;
11097 if (align <= 4 && desired_alignment > 4)
11099 rtx label = ix86_expand_aligntest (destreg, 4);
11100 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11101 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11103 ix86_adjust_counter (countreg, 4);
11104 emit_label (label);
11105 LABEL_NUSES (label) = 1;
11108 if (label && desired_alignment > 4 && !TARGET_64BIT)
11110 emit_label (label);
11111 LABEL_NUSES (label) = 1;
11115 if (!TARGET_SINGLE_STRINGOP)
11116 emit_insn (gen_cld ());
11119 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11121 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11122 destreg, countreg2));
11126 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11127 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11128 destreg, countreg2));
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11136 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11137 emit_insn (gen_strsetsi (destreg,
11138 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11139 if (TARGET_64BIT && (align <= 4 || count == 0))
11141 rtx label = ix86_expand_aligntest (countreg, 4);
11142 emit_insn (gen_strsetsi (destreg,
11143 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11144 emit_label (label);
11145 LABEL_NUSES (label) = 1;
11147 if (align > 2 && count != 0 && (count & 2))
11148 emit_insn (gen_strsethi (destreg,
11149 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11150 if (align <= 2 || count == 0)
11152 rtx label = ix86_expand_aligntest (countreg, 2);
11153 emit_insn (gen_strsethi (destreg,
11154 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11155 emit_label (label);
11156 LABEL_NUSES (label) = 1;
11158 if (align > 1 && count != 0 && (count & 1))
11159 emit_insn (gen_strsetqi (destreg,
11160 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11161 if (align <= 1 || count == 0)
11163 rtx label = ix86_expand_aligntest (countreg, 1);
11164 emit_insn (gen_strsetqi (destreg,
11165 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11166 emit_label (label);
11167 LABEL_NUSES (label) = 1;
11172 /* Expand strlen. */
11174 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11176 rtx addr, scratch1, scratch2, scratch3, scratch4;
11178 /* The generic case of strlen expander is long. Avoid it's
11179 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11181 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11182 && !TARGET_INLINE_ALL_STRINGOPS
11184 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11187 addr = force_reg (Pmode, XEXP (src, 0));
11188 scratch1 = gen_reg_rtx (Pmode);
11190 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11193 /* Well it seems that some optimizer does not combine a call like
11194 foo(strlen(bar), strlen(bar));
11195 when the move and the subtraction is done here. It does calculate
11196 the length just once when these instructions are done inside of
11197 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11198 often used and I use one fewer register for the lifetime of
11199 output_strlen_unroll() this is better. */
11201 emit_move_insn (out, addr);
11203 ix86_expand_strlensi_unroll_1 (out, align);
11205 /* strlensi_unroll_1 returns the address of the zero at the end of
11206 the string, like memchr(), so compute the length by subtracting
11207 the start address. */
11209 emit_insn (gen_subdi3 (out, out, addr));
11211 emit_insn (gen_subsi3 (out, out, addr));
11215 scratch2 = gen_reg_rtx (Pmode);
11216 scratch3 = gen_reg_rtx (Pmode);
11217 scratch4 = force_reg (Pmode, constm1_rtx);
11219 emit_move_insn (scratch3, addr);
11220 eoschar = force_reg (QImode, eoschar);
11222 emit_insn (gen_cld ());
11225 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11226 align, scratch4, scratch3));
11227 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11228 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11232 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11233 align, scratch4, scratch3));
11234 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11235 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11241 /* Expand the appropriate insns for doing strlen if not just doing
11244 out = result, initialized with the start address
11245 align_rtx = alignment of the address.
11246 scratch = scratch register, initialized with the startaddress when
11247 not aligned, otherwise undefined
11249 This is just the body. It needs the initializations mentioned above and
11250 some address computing at the end. These things are done in i386.md. */
11253 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11257 rtx align_2_label = NULL_RTX;
11258 rtx align_3_label = NULL_RTX;
11259 rtx align_4_label = gen_label_rtx ();
11260 rtx end_0_label = gen_label_rtx ();
11262 rtx tmpreg = gen_reg_rtx (SImode);
11263 rtx scratch = gen_reg_rtx (SImode);
11267 if (GET_CODE (align_rtx) == CONST_INT)
11268 align = INTVAL (align_rtx);
11270 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11272 /* Is there a known alignment and is it less than 4? */
11275 rtx scratch1 = gen_reg_rtx (Pmode);
11276 emit_move_insn (scratch1, out);
11277 /* Is there a known alignment and is it not 2? */
11280 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11281 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11283 /* Leave just the 3 lower bits. */
11284 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11285 NULL_RTX, 0, OPTAB_WIDEN);
11287 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11288 Pmode, 1, align_4_label);
11289 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11290 Pmode, 1, align_2_label);
11291 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11292 Pmode, 1, align_3_label);
11296 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11297 check if is aligned to 4 - byte. */
11299 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11300 NULL_RTX, 0, OPTAB_WIDEN);
11302 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11303 Pmode, 1, align_4_label);
11306 mem = gen_rtx_MEM (QImode, out);
11308 /* Now compare the bytes. */
11310 /* Compare the first n unaligned byte on a byte per byte basis. */
11311 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11312 QImode, 1, end_0_label);
11314 /* Increment the address. */
11316 emit_insn (gen_adddi3 (out, out, const1_rtx));
11318 emit_insn (gen_addsi3 (out, out, const1_rtx));
11320 /* Not needed with an alignment of 2 */
11323 emit_label (align_2_label);
11325 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11329 emit_insn (gen_adddi3 (out, out, const1_rtx));
11331 emit_insn (gen_addsi3 (out, out, const1_rtx));
11333 emit_label (align_3_label);
11336 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11340 emit_insn (gen_adddi3 (out, out, const1_rtx));
11342 emit_insn (gen_addsi3 (out, out, const1_rtx));
11345 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11346 align this loop. It gives only huge programs, but does not help to
11348 emit_label (align_4_label);
11350 mem = gen_rtx_MEM (SImode, out);
11351 emit_move_insn (scratch, mem);
11353 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11355 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11357 /* This formula yields a nonzero result iff one of the bytes is zero.
11358 This saves three branches inside loop and many cycles. */
11360 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11361 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11362 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11363 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11364 gen_int_mode (0x80808080, SImode)));
11365 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11370 rtx reg = gen_reg_rtx (SImode);
11371 rtx reg2 = gen_reg_rtx (Pmode);
11372 emit_move_insn (reg, tmpreg);
11373 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11375 /* If zero is not in the first two bytes, move two bytes forward. */
11376 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11377 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11378 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11379 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11380 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11383 /* Emit lea manually to avoid clobbering of flags. */
11384 emit_insn (gen_rtx_SET (SImode, reg2,
11385 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11387 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11388 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11389 emit_insn (gen_rtx_SET (VOIDmode, out,
11390 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11397 rtx end_2_label = gen_label_rtx ();
11398 /* Is zero in the first two bytes? */
11400 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11401 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11402 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11403 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11404 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11406 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11407 JUMP_LABEL (tmp) = end_2_label;
11409 /* Not in the first two. Move two bytes forward. */
11410 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11412 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11414 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11416 emit_label (end_2_label);
11420 /* Avoid branch in fixing the byte. */
11421 tmpreg = gen_lowpart (QImode, tmpreg);
11422 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11423 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11425 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11427 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11429 emit_label (end_0_label);
11433 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11434 rtx pop, int sibcall)
11436 rtx use = NULL, call;
11438 if (pop == const0_rtx)
11440 if (TARGET_64BIT && pop)
11444 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11445 fnaddr = machopic_indirect_call_target (fnaddr);
11447 /* Static functions and indirect calls don't need the pic register. */
11448 if (! TARGET_64BIT && flag_pic
11449 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11450 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11451 use_reg (&use, pic_offset_table_rtx);
11453 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11455 rtx al = gen_rtx_REG (QImode, 0);
11456 emit_move_insn (al, callarg2);
11457 use_reg (&use, al);
11459 #endif /* TARGET_MACHO */
11461 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11463 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11464 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11466 if (sibcall && TARGET_64BIT
11467 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11470 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11471 fnaddr = gen_rtx_REG (Pmode, 40);
11472 emit_move_insn (fnaddr, addr);
11473 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11476 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11478 call = gen_rtx_SET (VOIDmode, retval, call);
11481 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11482 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11483 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11486 call = emit_call_insn (call);
11488 CALL_INSN_FUNCTION_USAGE (call) = use;
11492 /* Clear stack slot assignments remembered from previous functions.
11493 This is called from INIT_EXPANDERS once before RTL is emitted for each
11496 static struct machine_function *
11497 ix86_init_machine_status (void)
11499 struct machine_function *f;
11501 f = ggc_alloc_cleared (sizeof (struct machine_function));
11502 f->use_fast_prologue_epilogue_nregs = -1;
11507 /* Return a MEM corresponding to a stack slot with mode MODE.
11508 Allocate a new slot if necessary.
11510 The RTL for a function can have several slots available: N is
11511 which slot to use. */
11514 assign_386_stack_local (enum machine_mode mode, int n)
11516 struct stack_local_entry *s;
11518 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11521 for (s = ix86_stack_locals; s; s = s->next)
11522 if (s->mode == mode && s->n == n)
11525 s = (struct stack_local_entry *)
11526 ggc_alloc (sizeof (struct stack_local_entry));
11529 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11531 s->next = ix86_stack_locals;
11532 ix86_stack_locals = s;
11536 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11538 static GTY(()) rtx ix86_tls_symbol;
11540 ix86_tls_get_addr (void)
11543 if (!ix86_tls_symbol)
11545 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11546 (TARGET_GNU_TLS && !TARGET_64BIT)
11547 ? "___tls_get_addr"
11548 : "__tls_get_addr");
11551 return ix86_tls_symbol;
11554 /* Calculate the length of the memory address in the instruction
11555 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11558 memory_address_length (rtx addr)
11560 struct ix86_address parts;
11561 rtx base, index, disp;
11564 if (GET_CODE (addr) == PRE_DEC
11565 || GET_CODE (addr) == POST_INC
11566 || GET_CODE (addr) == PRE_MODIFY
11567 || GET_CODE (addr) == POST_MODIFY)
11570 if (! ix86_decompose_address (addr, &parts))
11574 index = parts.index;
11578 /* Register Indirect. */
11579 if (base && !index && !disp)
11581 /* Special cases: ebp and esp need the two-byte modrm form. */
11582 if (addr == stack_pointer_rtx
11583 || addr == arg_pointer_rtx
11584 || addr == frame_pointer_rtx
11585 || addr == hard_frame_pointer_rtx)
11589 /* Direct Addressing. */
11590 else if (disp && !base && !index)
11595 /* Find the length of the displacement constant. */
11598 if (GET_CODE (disp) == CONST_INT
11599 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11606 /* An index requires the two-byte modrm form. */
11614 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11615 is set, expect that insn have 8bit immediate alternative. */
11617 ix86_attr_length_immediate_default (rtx insn, int shortform)
11621 extract_insn_cached (insn);
11622 for (i = recog_data.n_operands - 1; i >= 0; --i)
11623 if (CONSTANT_P (recog_data.operand[i]))
11628 && GET_CODE (recog_data.operand[i]) == CONST_INT
11629 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11633 switch (get_attr_mode (insn))
11644 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11649 fatal_insn ("unknown insn mode", insn);
11655 /* Compute default value for "length_address" attribute. */
11657 ix86_attr_length_address_default (rtx insn)
11661 if (get_attr_type (insn) == TYPE_LEA)
11663 rtx set = PATTERN (insn);
11664 if (GET_CODE (set) == SET)
11666 else if (GET_CODE (set) == PARALLEL
11667 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11668 set = XVECEXP (set, 0, 0);
11671 #ifdef ENABLE_CHECKING
11677 return memory_address_length (SET_SRC (set));
11680 extract_insn_cached (insn);
11681 for (i = recog_data.n_operands - 1; i >= 0; --i)
11682 if (GET_CODE (recog_data.operand[i]) == MEM)
11684 return memory_address_length (XEXP (recog_data.operand[i], 0));
11690 /* Return the maximum number of instructions a cpu can issue. */
11693 ix86_issue_rate (void)
11697 case PROCESSOR_PENTIUM:
11701 case PROCESSOR_PENTIUMPRO:
11702 case PROCESSOR_PENTIUM4:
11703 case PROCESSOR_ATHLON:
11712 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11713 by DEP_INSN and nothing set by DEP_INSN. */
11716 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11720 /* Simplify the test for uninteresting insns. */
11721 if (insn_type != TYPE_SETCC
11722 && insn_type != TYPE_ICMOV
11723 && insn_type != TYPE_FCMOV
11724 && insn_type != TYPE_IBR)
11727 if ((set = single_set (dep_insn)) != 0)
11729 set = SET_DEST (set);
11732 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11733 && XVECLEN (PATTERN (dep_insn), 0) == 2
11734 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11735 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11737 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11738 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11743 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11746 /* This test is true if the dependent insn reads the flags but
11747 not any other potentially set register. */
11748 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11751 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11757 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11758 address with operands set by DEP_INSN. */
11761 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11765 if (insn_type == TYPE_LEA
11768 addr = PATTERN (insn);
11769 if (GET_CODE (addr) == SET)
11771 else if (GET_CODE (addr) == PARALLEL
11772 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11773 addr = XVECEXP (addr, 0, 0);
11776 addr = SET_SRC (addr);
11781 extract_insn_cached (insn);
11782 for (i = recog_data.n_operands - 1; i >= 0; --i)
11783 if (GET_CODE (recog_data.operand[i]) == MEM)
11785 addr = XEXP (recog_data.operand[i], 0);
11792 return modified_in_p (addr, dep_insn);
11796 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11798 enum attr_type insn_type, dep_insn_type;
11799 enum attr_memory memory, dep_memory;
11801 int dep_insn_code_number;
11803 /* Anti and output dependencies have zero cost on all CPUs. */
11804 if (REG_NOTE_KIND (link) != 0)
11807 dep_insn_code_number = recog_memoized (dep_insn);
11809 /* If we can't recognize the insns, we can't really do anything. */
11810 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11813 insn_type = get_attr_type (insn);
11814 dep_insn_type = get_attr_type (dep_insn);
11818 case PROCESSOR_PENTIUM:
11819 /* Address Generation Interlock adds a cycle of latency. */
11820 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11823 /* ??? Compares pair with jump/setcc. */
11824 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11827 /* Floating point stores require value to be ready one cycle earlier. */
11828 if (insn_type == TYPE_FMOV
11829 && get_attr_memory (insn) == MEMORY_STORE
11830 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11834 case PROCESSOR_PENTIUMPRO:
11835 memory = get_attr_memory (insn);
11836 dep_memory = get_attr_memory (dep_insn);
11838 /* Since we can't represent delayed latencies of load+operation,
11839 increase the cost here for non-imov insns. */
11840 if (dep_insn_type != TYPE_IMOV
11841 && dep_insn_type != TYPE_FMOV
11842 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11845 /* INT->FP conversion is expensive. */
11846 if (get_attr_fp_int_src (dep_insn))
11849 /* There is one cycle extra latency between an FP op and a store. */
11850 if (insn_type == TYPE_FMOV
11851 && (set = single_set (dep_insn)) != NULL_RTX
11852 && (set2 = single_set (insn)) != NULL_RTX
11853 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11854 && GET_CODE (SET_DEST (set2)) == MEM)
11857 /* Show ability of reorder buffer to hide latency of load by executing
11858 in parallel with previous instruction in case
11859 previous instruction is not needed to compute the address. */
11860 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11861 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11863 /* Claim moves to take one cycle, as core can issue one load
11864 at time and the next load can start cycle later. */
11865 if (dep_insn_type == TYPE_IMOV
11866 || dep_insn_type == TYPE_FMOV)
11874 memory = get_attr_memory (insn);
11875 dep_memory = get_attr_memory (dep_insn);
11876 /* The esp dependency is resolved before the instruction is really
11878 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11879 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11882 /* Since we can't represent delayed latencies of load+operation,
11883 increase the cost here for non-imov insns. */
11884 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11885 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11887 /* INT->FP conversion is expensive. */
11888 if (get_attr_fp_int_src (dep_insn))
11891 /* Show ability of reorder buffer to hide latency of load by executing
11892 in parallel with previous instruction in case
11893 previous instruction is not needed to compute the address. */
11894 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11895 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11897 /* Claim moves to take one cycle, as core can issue one load
11898 at time and the next load can start cycle later. */
11899 if (dep_insn_type == TYPE_IMOV
11900 || dep_insn_type == TYPE_FMOV)
11909 case PROCESSOR_ATHLON:
11911 memory = get_attr_memory (insn);
11912 dep_memory = get_attr_memory (dep_insn);
11914 /* Show ability of reorder buffer to hide latency of load by executing
11915 in parallel with previous instruction in case
11916 previous instruction is not needed to compute the address. */
11917 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11918 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11920 enum attr_unit unit = get_attr_unit (insn);
11923 /* Because of the difference between the length of integer and
11924 floating unit pipeline preparation stages, the memory operands
11925 for floating point are cheaper.
11927 ??? For Athlon it the difference is most probably 2. */
11928 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11931 loadcost = TARGET_ATHLON ? 2 : 0;
11933 if (cost >= loadcost)
11948 struct ppro_sched_data
11951 int issued_this_cycle;
11955 static enum attr_ppro_uops
11956 ix86_safe_ppro_uops (rtx insn)
11958 if (recog_memoized (insn) >= 0)
11959 return get_attr_ppro_uops (insn);
11961 return PPRO_UOPS_MANY;
11965 ix86_dump_ppro_packet (FILE *dump)
11967 if (ix86_sched_data.ppro.decode[0])
11969 fprintf (dump, "PPRO packet: %d",
11970 INSN_UID (ix86_sched_data.ppro.decode[0]));
11971 if (ix86_sched_data.ppro.decode[1])
11972 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11973 if (ix86_sched_data.ppro.decode[2])
11974 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11975 fputc ('\n', dump);
11979 /* We're beginning a new block. Initialize data structures as necessary. */
11982 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
11983 int sched_verbose ATTRIBUTE_UNUSED,
11984 int veclen ATTRIBUTE_UNUSED)
11986 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11989 /* Shift INSN to SLOT, and shift everything else down. */
11992 ix86_reorder_insn (rtx *insnp, rtx *slot)
11998 insnp[0] = insnp[1];
11999 while (++insnp != slot);
12005 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12008 enum attr_ppro_uops cur_uops;
12009 int issued_this_cycle;
12013 /* At this point .ppro.decode contains the state of the three
12014 decoders from last "cycle". That is, those insns that were
12015 actually independent. But here we're scheduling for the
12016 decoder, and we may find things that are decodable in the
12019 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12020 issued_this_cycle = 0;
12023 cur_uops = ix86_safe_ppro_uops (*insnp);
12025 /* If the decoders are empty, and we've a complex insn at the
12026 head of the priority queue, let it issue without complaint. */
12027 if (decode[0] == NULL)
12029 if (cur_uops == PPRO_UOPS_MANY)
12031 decode[0] = *insnp;
12035 /* Otherwise, search for a 2-4 uop unsn to issue. */
12036 while (cur_uops != PPRO_UOPS_FEW)
12038 if (insnp == ready)
12040 cur_uops = ix86_safe_ppro_uops (*--insnp);
12043 /* If so, move it to the head of the line. */
12044 if (cur_uops == PPRO_UOPS_FEW)
12045 ix86_reorder_insn (insnp, e_ready);
12047 /* Issue the head of the queue. */
12048 issued_this_cycle = 1;
12049 decode[0] = *e_ready--;
12052 /* Look for simple insns to fill in the other two slots. */
12053 for (i = 1; i < 3; ++i)
12054 if (decode[i] == NULL)
12056 if (ready > e_ready)
12060 cur_uops = ix86_safe_ppro_uops (*insnp);
12061 while (cur_uops != PPRO_UOPS_ONE)
12063 if (insnp == ready)
12065 cur_uops = ix86_safe_ppro_uops (*--insnp);
12068 /* Found one. Move it to the head of the queue and issue it. */
12069 if (cur_uops == PPRO_UOPS_ONE)
12071 ix86_reorder_insn (insnp, e_ready);
12072 decode[i] = *e_ready--;
12073 issued_this_cycle++;
12077 /* ??? Didn't find one. Ideally, here we would do a lazy split
12078 of 2-uop insns, issue one and queue the other. */
12082 if (issued_this_cycle == 0)
12083 issued_this_cycle = 1;
12084 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12087 /* We are about to being issuing insns for this clock cycle.
12088 Override the default sort algorithm to better slot instructions. */
12090 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12091 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12092 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12094 int n_ready = *n_readyp;
12095 rtx *e_ready = ready + n_ready - 1;
12097 /* Make sure to go ahead and initialize key items in
12098 ix86_sched_data if we are not going to bother trying to
12099 reorder the ready queue. */
12102 ix86_sched_data.ppro.issued_this_cycle = 1;
12111 case PROCESSOR_PENTIUMPRO:
12112 ix86_sched_reorder_ppro (ready, e_ready);
12117 return ix86_issue_rate ();
12120 /* We are about to issue INSN. Return the number of insns left on the
12121 ready queue that can be issued this cycle. */
12124 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12125 int can_issue_more)
12131 return can_issue_more - 1;
12133 case PROCESSOR_PENTIUMPRO:
12135 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12137 if (uops == PPRO_UOPS_MANY)
12140 ix86_dump_ppro_packet (dump);
12141 ix86_sched_data.ppro.decode[0] = insn;
12142 ix86_sched_data.ppro.decode[1] = NULL;
12143 ix86_sched_data.ppro.decode[2] = NULL;
12145 ix86_dump_ppro_packet (dump);
12146 ix86_sched_data.ppro.decode[0] = NULL;
12148 else if (uops == PPRO_UOPS_FEW)
12151 ix86_dump_ppro_packet (dump);
12152 ix86_sched_data.ppro.decode[0] = insn;
12153 ix86_sched_data.ppro.decode[1] = NULL;
12154 ix86_sched_data.ppro.decode[2] = NULL;
12158 for (i = 0; i < 3; ++i)
12159 if (ix86_sched_data.ppro.decode[i] == NULL)
12161 ix86_sched_data.ppro.decode[i] = insn;
12169 ix86_dump_ppro_packet (dump);
12170 ix86_sched_data.ppro.decode[0] = NULL;
12171 ix86_sched_data.ppro.decode[1] = NULL;
12172 ix86_sched_data.ppro.decode[2] = NULL;
12176 return --ix86_sched_data.ppro.issued_this_cycle;
12181 ia32_use_dfa_pipeline_interface (void)
12183 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12188 /* How many alternative schedules to try. This should be as wide as the
12189 scheduling freedom in the DFA, but no wider. Making this value too
12190 large results extra work for the scheduler. */
12193 ia32_multipass_dfa_lookahead (void)
12195 if (ix86_tune == PROCESSOR_PENTIUM)
12202 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12203 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12207 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12212 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12214 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12218 /* Subroutine of above to actually do the updating by recursively walking
12222 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12225 enum rtx_code code = GET_CODE (x);
12226 const char *format_ptr = GET_RTX_FORMAT (code);
12229 if (code == MEM && XEXP (x, 0) == dstreg)
12230 MEM_COPY_ATTRIBUTES (x, dstref);
12231 else if (code == MEM && XEXP (x, 0) == srcreg)
12232 MEM_COPY_ATTRIBUTES (x, srcref);
12234 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12236 if (*format_ptr == 'e')
12237 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12239 else if (*format_ptr == 'E')
12240 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12241 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12246 /* Compute the alignment given to a constant that is being placed in memory.
12247 EXP is the constant and ALIGN is the alignment that the object would
12249 The value of this function is used instead of that alignment to align
12253 ix86_constant_alignment (tree exp, int align)
12255 if (TREE_CODE (exp) == REAL_CST)
12257 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12259 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12262 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12269 /* Compute the alignment for a static variable.
12270 TYPE is the data type, and ALIGN is the alignment that
12271 the object would ordinarily have. The value of this function is used
12272 instead of that alignment to align the object. */
12275 ix86_data_alignment (tree type, int align)
12277 if (AGGREGATE_TYPE_P (type)
12278 && TYPE_SIZE (type)
12279 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12280 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12281 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12284 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12285 to 16byte boundary. */
12288 if (AGGREGATE_TYPE_P (type)
12289 && TYPE_SIZE (type)
12290 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12291 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12292 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12296 if (TREE_CODE (type) == ARRAY_TYPE)
12298 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12300 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12303 else if (TREE_CODE (type) == COMPLEX_TYPE)
12306 if (TYPE_MODE (type) == DCmode && align < 64)
12308 if (TYPE_MODE (type) == XCmode && align < 128)
12311 else if ((TREE_CODE (type) == RECORD_TYPE
12312 || TREE_CODE (type) == UNION_TYPE
12313 || TREE_CODE (type) == QUAL_UNION_TYPE)
12314 && TYPE_FIELDS (type))
12316 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12318 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12321 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12322 || TREE_CODE (type) == INTEGER_TYPE)
12324 if (TYPE_MODE (type) == DFmode && align < 64)
12326 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12333 /* Compute the alignment for a local variable.
12334 TYPE is the data type, and ALIGN is the alignment that
12335 the object would ordinarily have. The value of this macro is used
12336 instead of that alignment to align the object. */
12339 ix86_local_alignment (tree type, int align)
12341 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12342 to 16byte boundary. */
12345 if (AGGREGATE_TYPE_P (type)
12346 && TYPE_SIZE (type)
12347 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12348 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12349 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12352 if (TREE_CODE (type) == ARRAY_TYPE)
12354 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12356 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12359 else if (TREE_CODE (type) == COMPLEX_TYPE)
12361 if (TYPE_MODE (type) == DCmode && align < 64)
12363 if (TYPE_MODE (type) == XCmode && align < 128)
12366 else if ((TREE_CODE (type) == RECORD_TYPE
12367 || TREE_CODE (type) == UNION_TYPE
12368 || TREE_CODE (type) == QUAL_UNION_TYPE)
12369 && TYPE_FIELDS (type))
12371 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12373 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12376 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12377 || TREE_CODE (type) == INTEGER_TYPE)
12380 if (TYPE_MODE (type) == DFmode && align < 64)
12382 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12388 /* Emit RTL insns to initialize the variable parts of a trampoline.
12389 FNADDR is an RTX for the address of the function's pure code.
12390 CXT is an RTX for the static chain value for the function. */
12392 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12396 /* Compute offset from the end of the jmp to the target function. */
12397 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12398 plus_constant (tramp, 10),
12399 NULL_RTX, 1, OPTAB_DIRECT);
12400 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12401 gen_int_mode (0xb9, QImode));
12402 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12403 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12404 gen_int_mode (0xe9, QImode));
12405 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12410 /* Try to load address using shorter movl instead of movabs.
12411 We may want to support movq for kernel mode, but kernel does not use
12412 trampolines at the moment. */
12413 if (x86_64_zero_extended_value (fnaddr))
12415 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12416 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12417 gen_int_mode (0xbb41, HImode));
12418 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12419 gen_lowpart (SImode, fnaddr));
12424 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12425 gen_int_mode (0xbb49, HImode));
12426 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12430 /* Load static chain using movabs to r10. */
12431 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12432 gen_int_mode (0xba49, HImode));
12433 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12436 /* Jump to the r11 */
12437 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12438 gen_int_mode (0xff49, HImode));
12439 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12440 gen_int_mode (0xe3, QImode));
12442 if (offset > TRAMPOLINE_SIZE)
12446 #ifdef TRANSFER_FROM_TRAMPOLINE
12447 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12448 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12452 #define def_builtin(MASK, NAME, TYPE, CODE) \
12454 if ((MASK) & target_flags \
12455 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12456 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12457 NULL, NULL_TREE); \
12460 struct builtin_description
12462 const unsigned int mask;
12463 const enum insn_code icode;
12464 const char *const name;
12465 const enum ix86_builtins code;
12466 const enum rtx_code comparison;
12467 const unsigned int flag;
12470 static const struct builtin_description bdesc_comi[] =
12472 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12473 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12474 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12475 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12476 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12477 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12478 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12479 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12480 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12481 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12482 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12483 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12484 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12485 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12486 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12487 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12488 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12489 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12490 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12491 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12492 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12495 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12498 static const struct builtin_description bdesc_2arg[] =
12501 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12502 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12503 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12504 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12505 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12506 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12507 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12508 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12510 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12511 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12512 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12513 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12514 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12515 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12516 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12517 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12518 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12519 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12520 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12521 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12522 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12523 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12524 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12525 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12526 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12527 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12528 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12529 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12531 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12532 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12533 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12534 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12536 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12537 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12538 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12539 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12541 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12542 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12543 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12544 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12545 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12548 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12549 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12550 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12551 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12552 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12553 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12554 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12555 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12557 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12558 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12559 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12560 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12561 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12562 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12563 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12564 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12566 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12567 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12568 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12570 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12571 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12572 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12573 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12575 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12576 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12578 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12579 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12580 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12581 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12582 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12583 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12585 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12586 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12587 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12588 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12590 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12591 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12592 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12593 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12594 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12595 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12598 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12599 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12600 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12602 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12603 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12604 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12606 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12607 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12608 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12609 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12610 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12611 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12613 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12614 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12615 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12616 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12617 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12618 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12620 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12621 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12622 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12623 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12625 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12632 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12639 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12640 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12641 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12642 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12643 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12644 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12645 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12646 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12647 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12648 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12649 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12650 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12651 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12652 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12653 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12654 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12655 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12656 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12657 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12659 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12660 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12670 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12674 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12676 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12677 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12678 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12679 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12680 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12681 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12683 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12684 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12685 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12686 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12687 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12688 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12689 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12690 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12706 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12715 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12721 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12722 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12734 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12737 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12755 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12760 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12761 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12762 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12763 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12764 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12765 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12768 static const struct builtin_description bdesc_1arg[] =
12770 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12771 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12773 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12774 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12775 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12777 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12778 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12779 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12780 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12781 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12782 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12796 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12802 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12804 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12805 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12814 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12815 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12816 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12820 ix86_init_builtins (void)
12823 ix86_init_mmx_sse_builtins ();
12826 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12827 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12830 ix86_init_mmx_sse_builtins (void)
12832 const struct builtin_description * d;
12835 tree pchar_type_node = build_pointer_type (char_type_node);
12836 tree pcchar_type_node = build_pointer_type (
12837 build_type_variant (char_type_node, 1, 0));
12838 tree pfloat_type_node = build_pointer_type (float_type_node);
12839 tree pcfloat_type_node = build_pointer_type (
12840 build_type_variant (float_type_node, 1, 0));
12841 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12842 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12843 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12846 tree int_ftype_v4sf_v4sf
12847 = build_function_type_list (integer_type_node,
12848 V4SF_type_node, V4SF_type_node, NULL_TREE);
12849 tree v4si_ftype_v4sf_v4sf
12850 = build_function_type_list (V4SI_type_node,
12851 V4SF_type_node, V4SF_type_node, NULL_TREE);
12852 /* MMX/SSE/integer conversions. */
12853 tree int_ftype_v4sf
12854 = build_function_type_list (integer_type_node,
12855 V4SF_type_node, NULL_TREE);
12856 tree int64_ftype_v4sf
12857 = build_function_type_list (long_long_integer_type_node,
12858 V4SF_type_node, NULL_TREE);
12859 tree int_ftype_v8qi
12860 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12861 tree v4sf_ftype_v4sf_int
12862 = build_function_type_list (V4SF_type_node,
12863 V4SF_type_node, integer_type_node, NULL_TREE);
12864 tree v4sf_ftype_v4sf_int64
12865 = build_function_type_list (V4SF_type_node,
12866 V4SF_type_node, long_long_integer_type_node,
12868 tree v4sf_ftype_v4sf_v2si
12869 = build_function_type_list (V4SF_type_node,
12870 V4SF_type_node, V2SI_type_node, NULL_TREE);
12871 tree int_ftype_v4hi_int
12872 = build_function_type_list (integer_type_node,
12873 V4HI_type_node, integer_type_node, NULL_TREE);
12874 tree v4hi_ftype_v4hi_int_int
12875 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12876 integer_type_node, integer_type_node,
12878 /* Miscellaneous. */
12879 tree v8qi_ftype_v4hi_v4hi
12880 = build_function_type_list (V8QI_type_node,
12881 V4HI_type_node, V4HI_type_node, NULL_TREE);
12882 tree v4hi_ftype_v2si_v2si
12883 = build_function_type_list (V4HI_type_node,
12884 V2SI_type_node, V2SI_type_node, NULL_TREE);
12885 tree v4sf_ftype_v4sf_v4sf_int
12886 = build_function_type_list (V4SF_type_node,
12887 V4SF_type_node, V4SF_type_node,
12888 integer_type_node, NULL_TREE);
12889 tree v2si_ftype_v4hi_v4hi
12890 = build_function_type_list (V2SI_type_node,
12891 V4HI_type_node, V4HI_type_node, NULL_TREE);
12892 tree v4hi_ftype_v4hi_int
12893 = build_function_type_list (V4HI_type_node,
12894 V4HI_type_node, integer_type_node, NULL_TREE);
12895 tree v4hi_ftype_v4hi_di
12896 = build_function_type_list (V4HI_type_node,
12897 V4HI_type_node, long_long_unsigned_type_node,
12899 tree v2si_ftype_v2si_di
12900 = build_function_type_list (V2SI_type_node,
12901 V2SI_type_node, long_long_unsigned_type_node,
12903 tree void_ftype_void
12904 = build_function_type (void_type_node, void_list_node);
12905 tree void_ftype_unsigned
12906 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12907 tree void_ftype_unsigned_unsigned
12908 = build_function_type_list (void_type_node, unsigned_type_node,
12909 unsigned_type_node, NULL_TREE);
12910 tree void_ftype_pcvoid_unsigned_unsigned
12911 = build_function_type_list (void_type_node, const_ptr_type_node,
12912 unsigned_type_node, unsigned_type_node,
12914 tree unsigned_ftype_void
12915 = build_function_type (unsigned_type_node, void_list_node);
12917 = build_function_type (long_long_unsigned_type_node, void_list_node);
12918 tree v4sf_ftype_void
12919 = build_function_type (V4SF_type_node, void_list_node);
12920 tree v2si_ftype_v4sf
12921 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12922 /* Loads/stores. */
12923 tree void_ftype_v8qi_v8qi_pchar
12924 = build_function_type_list (void_type_node,
12925 V8QI_type_node, V8QI_type_node,
12926 pchar_type_node, NULL_TREE);
12927 tree v4sf_ftype_pcfloat
12928 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12929 /* @@@ the type is bogus */
12930 tree v4sf_ftype_v4sf_pv2si
12931 = build_function_type_list (V4SF_type_node,
12932 V4SF_type_node, pv2si_type_node, NULL_TREE);
12933 tree void_ftype_pv2si_v4sf
12934 = build_function_type_list (void_type_node,
12935 pv2si_type_node, V4SF_type_node, NULL_TREE);
12936 tree void_ftype_pfloat_v4sf
12937 = build_function_type_list (void_type_node,
12938 pfloat_type_node, V4SF_type_node, NULL_TREE);
12939 tree void_ftype_pdi_di
12940 = build_function_type_list (void_type_node,
12941 pdi_type_node, long_long_unsigned_type_node,
12943 tree void_ftype_pv2di_v2di
12944 = build_function_type_list (void_type_node,
12945 pv2di_type_node, V2DI_type_node, NULL_TREE);
12946 /* Normal vector unops. */
12947 tree v4sf_ftype_v4sf
12948 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12950 /* Normal vector binops. */
12951 tree v4sf_ftype_v4sf_v4sf
12952 = build_function_type_list (V4SF_type_node,
12953 V4SF_type_node, V4SF_type_node, NULL_TREE);
12954 tree v8qi_ftype_v8qi_v8qi
12955 = build_function_type_list (V8QI_type_node,
12956 V8QI_type_node, V8QI_type_node, NULL_TREE);
12957 tree v4hi_ftype_v4hi_v4hi
12958 = build_function_type_list (V4HI_type_node,
12959 V4HI_type_node, V4HI_type_node, NULL_TREE);
12960 tree v2si_ftype_v2si_v2si
12961 = build_function_type_list (V2SI_type_node,
12962 V2SI_type_node, V2SI_type_node, NULL_TREE);
12963 tree di_ftype_di_di
12964 = build_function_type_list (long_long_unsigned_type_node,
12965 long_long_unsigned_type_node,
12966 long_long_unsigned_type_node, NULL_TREE);
12968 tree v2si_ftype_v2sf
12969 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12970 tree v2sf_ftype_v2si
12971 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12972 tree v2si_ftype_v2si
12973 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12974 tree v2sf_ftype_v2sf
12975 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12976 tree v2sf_ftype_v2sf_v2sf
12977 = build_function_type_list (V2SF_type_node,
12978 V2SF_type_node, V2SF_type_node, NULL_TREE);
12979 tree v2si_ftype_v2sf_v2sf
12980 = build_function_type_list (V2SI_type_node,
12981 V2SF_type_node, V2SF_type_node, NULL_TREE);
12982 tree pint_type_node = build_pointer_type (integer_type_node);
12983 tree pcint_type_node = build_pointer_type (
12984 build_type_variant (integer_type_node, 1, 0));
12985 tree pdouble_type_node = build_pointer_type (double_type_node);
12986 tree pcdouble_type_node = build_pointer_type (
12987 build_type_variant (double_type_node, 1, 0));
12988 tree int_ftype_v2df_v2df
12989 = build_function_type_list (integer_type_node,
12990 V2DF_type_node, V2DF_type_node, NULL_TREE);
12993 = build_function_type (intTI_type_node, void_list_node);
12994 tree v2di_ftype_void
12995 = build_function_type (V2DI_type_node, void_list_node);
12996 tree ti_ftype_ti_ti
12997 = build_function_type_list (intTI_type_node,
12998 intTI_type_node, intTI_type_node, NULL_TREE);
12999 tree void_ftype_pcvoid
13000 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13002 = build_function_type_list (V2DI_type_node,
13003 long_long_unsigned_type_node, NULL_TREE);
13005 = build_function_type_list (long_long_unsigned_type_node,
13006 V2DI_type_node, NULL_TREE);
13007 tree v4sf_ftype_v4si
13008 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13009 tree v4si_ftype_v4sf
13010 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13011 tree v2df_ftype_v4si
13012 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13013 tree v4si_ftype_v2df
13014 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13015 tree v2si_ftype_v2df
13016 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13017 tree v4sf_ftype_v2df
13018 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13019 tree v2df_ftype_v2si
13020 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13021 tree v2df_ftype_v4sf
13022 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13023 tree int_ftype_v2df
13024 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13025 tree int64_ftype_v2df
13026 = build_function_type_list (long_long_integer_type_node,
13027 V2DF_type_node, NULL_TREE);
13028 tree v2df_ftype_v2df_int
13029 = build_function_type_list (V2DF_type_node,
13030 V2DF_type_node, integer_type_node, NULL_TREE);
13031 tree v2df_ftype_v2df_int64
13032 = build_function_type_list (V2DF_type_node,
13033 V2DF_type_node, long_long_integer_type_node,
13035 tree v4sf_ftype_v4sf_v2df
13036 = build_function_type_list (V4SF_type_node,
13037 V4SF_type_node, V2DF_type_node, NULL_TREE);
13038 tree v2df_ftype_v2df_v4sf
13039 = build_function_type_list (V2DF_type_node,
13040 V2DF_type_node, V4SF_type_node, NULL_TREE);
13041 tree v2df_ftype_v2df_v2df_int
13042 = build_function_type_list (V2DF_type_node,
13043 V2DF_type_node, V2DF_type_node,
13046 tree v2df_ftype_v2df_pv2si
13047 = build_function_type_list (V2DF_type_node,
13048 V2DF_type_node, pv2si_type_node, NULL_TREE);
13049 tree void_ftype_pv2si_v2df
13050 = build_function_type_list (void_type_node,
13051 pv2si_type_node, V2DF_type_node, NULL_TREE);
13052 tree void_ftype_pdouble_v2df
13053 = build_function_type_list (void_type_node,
13054 pdouble_type_node, V2DF_type_node, NULL_TREE);
13055 tree void_ftype_pint_int
13056 = build_function_type_list (void_type_node,
13057 pint_type_node, integer_type_node, NULL_TREE);
13058 tree void_ftype_v16qi_v16qi_pchar
13059 = build_function_type_list (void_type_node,
13060 V16QI_type_node, V16QI_type_node,
13061 pchar_type_node, NULL_TREE);
13062 tree v2df_ftype_pcdouble
13063 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13064 tree v2df_ftype_v2df_v2df
13065 = build_function_type_list (V2DF_type_node,
13066 V2DF_type_node, V2DF_type_node, NULL_TREE);
13067 tree v16qi_ftype_v16qi_v16qi
13068 = build_function_type_list (V16QI_type_node,
13069 V16QI_type_node, V16QI_type_node, NULL_TREE);
13070 tree v8hi_ftype_v8hi_v8hi
13071 = build_function_type_list (V8HI_type_node,
13072 V8HI_type_node, V8HI_type_node, NULL_TREE);
13073 tree v4si_ftype_v4si_v4si
13074 = build_function_type_list (V4SI_type_node,
13075 V4SI_type_node, V4SI_type_node, NULL_TREE);
13076 tree v2di_ftype_v2di_v2di
13077 = build_function_type_list (V2DI_type_node,
13078 V2DI_type_node, V2DI_type_node, NULL_TREE);
13079 tree v2di_ftype_v2df_v2df
13080 = build_function_type_list (V2DI_type_node,
13081 V2DF_type_node, V2DF_type_node, NULL_TREE);
13082 tree v2df_ftype_v2df
13083 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13084 tree v2df_ftype_double
13085 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13086 tree v2df_ftype_double_double
13087 = build_function_type_list (V2DF_type_node,
13088 double_type_node, double_type_node, NULL_TREE);
13089 tree int_ftype_v8hi_int
13090 = build_function_type_list (integer_type_node,
13091 V8HI_type_node, integer_type_node, NULL_TREE);
13092 tree v8hi_ftype_v8hi_int_int
13093 = build_function_type_list (V8HI_type_node,
13094 V8HI_type_node, integer_type_node,
13095 integer_type_node, NULL_TREE);
13096 tree v2di_ftype_v2di_int
13097 = build_function_type_list (V2DI_type_node,
13098 V2DI_type_node, integer_type_node, NULL_TREE);
13099 tree v4si_ftype_v4si_int
13100 = build_function_type_list (V4SI_type_node,
13101 V4SI_type_node, integer_type_node, NULL_TREE);
13102 tree v8hi_ftype_v8hi_int
13103 = build_function_type_list (V8HI_type_node,
13104 V8HI_type_node, integer_type_node, NULL_TREE);
13105 tree v8hi_ftype_v8hi_v2di
13106 = build_function_type_list (V8HI_type_node,
13107 V8HI_type_node, V2DI_type_node, NULL_TREE);
13108 tree v4si_ftype_v4si_v2di
13109 = build_function_type_list (V4SI_type_node,
13110 V4SI_type_node, V2DI_type_node, NULL_TREE);
13111 tree v4si_ftype_v8hi_v8hi
13112 = build_function_type_list (V4SI_type_node,
13113 V8HI_type_node, V8HI_type_node, NULL_TREE);
13114 tree di_ftype_v8qi_v8qi
13115 = build_function_type_list (long_long_unsigned_type_node,
13116 V8QI_type_node, V8QI_type_node, NULL_TREE);
13117 tree v2di_ftype_v16qi_v16qi
13118 = build_function_type_list (V2DI_type_node,
13119 V16QI_type_node, V16QI_type_node, NULL_TREE);
13120 tree int_ftype_v16qi
13121 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13122 tree v16qi_ftype_pcchar
13123 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13124 tree void_ftype_pchar_v16qi
13125 = build_function_type_list (void_type_node,
13126 pchar_type_node, V16QI_type_node, NULL_TREE);
13127 tree v4si_ftype_pcint
13128 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13129 tree void_ftype_pcint_v4si
13130 = build_function_type_list (void_type_node,
13131 pcint_type_node, V4SI_type_node, NULL_TREE);
13132 tree v2di_ftype_v2di
13133 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13135 /* Add all builtins that are more or less simple operations on two
13137 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13139 /* Use one of the operands; the target can have a different mode for
13140 mask-generating compares. */
13141 enum machine_mode mode;
13146 mode = insn_data[d->icode].operand[1].mode;
13151 type = v16qi_ftype_v16qi_v16qi;
13154 type = v8hi_ftype_v8hi_v8hi;
13157 type = v4si_ftype_v4si_v4si;
13160 type = v2di_ftype_v2di_v2di;
13163 type = v2df_ftype_v2df_v2df;
13166 type = ti_ftype_ti_ti;
13169 type = v4sf_ftype_v4sf_v4sf;
13172 type = v8qi_ftype_v8qi_v8qi;
13175 type = v4hi_ftype_v4hi_v4hi;
13178 type = v2si_ftype_v2si_v2si;
13181 type = di_ftype_di_di;
13188 /* Override for comparisons. */
13189 if (d->icode == CODE_FOR_maskcmpv4sf3
13190 || d->icode == CODE_FOR_maskncmpv4sf3
13191 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13192 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13193 type = v4si_ftype_v4sf_v4sf;
13195 if (d->icode == CODE_FOR_maskcmpv2df3
13196 || d->icode == CODE_FOR_maskncmpv2df3
13197 || d->icode == CODE_FOR_vmmaskcmpv2df3
13198 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13199 type = v2di_ftype_v2df_v2df;
13201 def_builtin (d->mask, d->name, type, d->code);
13204 /* Add the remaining MMX insns with somewhat more complicated types. */
13205 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13206 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13207 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13208 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13209 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13211 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13212 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13213 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13215 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13216 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13218 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13219 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13221 /* comi/ucomi insns. */
13222 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13223 if (d->mask == MASK_SSE2)
13224 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13226 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13228 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13229 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13230 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13232 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13233 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13234 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13235 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13236 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13237 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13238 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13239 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13240 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13241 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13242 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13245 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13247 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13249 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13250 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13251 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13252 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13253 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13254 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13256 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13257 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13258 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13259 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13261 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13262 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13263 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13264 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13266 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13268 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13270 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13271 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13272 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13273 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13274 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13275 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13277 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13279 /* Original 3DNow! */
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13289 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13290 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13291 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13292 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13293 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13294 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13295 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13296 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13297 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13298 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13299 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13301 /* 3DNow! extension as used in the Athlon CPU. */
13302 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13303 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13304 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13305 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13306 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13307 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13309 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13321 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13327 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13332 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13337 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13338 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13343 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13348 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13351 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13359 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13360 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13361 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13368 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13377 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13380 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13381 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13385 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13389 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13392 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13394 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13399 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13402 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13405 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13406 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13407 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13408 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13410 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13411 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13412 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13413 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13415 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13416 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13418 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13420 /* Prescott New Instructions. */
13421 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13422 void_ftype_pcvoid_unsigned_unsigned,
13423 IX86_BUILTIN_MONITOR);
13424 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13425 void_ftype_unsigned_unsigned,
13426 IX86_BUILTIN_MWAIT);
13427 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13429 IX86_BUILTIN_MOVSHDUP);
13430 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13432 IX86_BUILTIN_MOVSLDUP);
13433 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13434 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13435 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13436 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13437 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13438 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13441 /* Errors in the source file can cause expand_expr to return const0_rtx
13442 where we expect a vector. To avoid crashing, use one of the vector
13443 clear instructions. */
13445 safe_vector_operand (rtx x, enum machine_mode mode)
13447 if (x != const0_rtx)
13449 x = gen_reg_rtx (mode);
13451 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13452 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13453 : gen_rtx_SUBREG (DImode, x, 0)));
13455 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13456 : gen_rtx_SUBREG (V4SFmode, x, 0),
13457 CONST0_RTX (V4SFmode)));
13461 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13464 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13467 tree arg0 = TREE_VALUE (arglist);
13468 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13469 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13470 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13471 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13472 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13473 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13475 if (VECTOR_MODE_P (mode0))
13476 op0 = safe_vector_operand (op0, mode0);
13477 if (VECTOR_MODE_P (mode1))
13478 op1 = safe_vector_operand (op1, mode1);
13481 || GET_MODE (target) != tmode
13482 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13483 target = gen_reg_rtx (tmode);
13485 if (GET_MODE (op1) == SImode && mode1 == TImode)
13487 rtx x = gen_reg_rtx (V4SImode);
13488 emit_insn (gen_sse2_loadd (x, op1));
13489 op1 = gen_lowpart (TImode, x);
13492 /* In case the insn wants input operands in modes different from
13493 the result, abort. */
13494 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13497 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13498 op0 = copy_to_mode_reg (mode0, op0);
13499 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13500 op1 = copy_to_mode_reg (mode1, op1);
13502 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13503 yet one of the two must not be a memory. This is normally enforced
13504 by expanders, but we didn't bother to create one here. */
13505 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13506 op0 = copy_to_mode_reg (mode0, op0);
13508 pat = GEN_FCN (icode) (target, op0, op1);
13515 /* Subroutine of ix86_expand_builtin to take care of stores. */
13518 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13521 tree arg0 = TREE_VALUE (arglist);
13522 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13523 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13524 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13525 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13526 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13528 if (VECTOR_MODE_P (mode1))
13529 op1 = safe_vector_operand (op1, mode1);
13531 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13532 op1 = copy_to_mode_reg (mode1, op1);
13534 pat = GEN_FCN (icode) (op0, op1);
13540 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13543 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13544 rtx target, int do_load)
13547 tree arg0 = TREE_VALUE (arglist);
13548 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13549 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13550 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13553 || GET_MODE (target) != tmode
13554 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13555 target = gen_reg_rtx (tmode);
13557 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13560 if (VECTOR_MODE_P (mode0))
13561 op0 = safe_vector_operand (op0, mode0);
13563 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13564 op0 = copy_to_mode_reg (mode0, op0);
13567 pat = GEN_FCN (icode) (target, op0);
13574 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13575 sqrtss, rsqrtss, rcpss. */
13578 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13581 tree arg0 = TREE_VALUE (arglist);
13582 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13583 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13584 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13587 || GET_MODE (target) != tmode
13588 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13589 target = gen_reg_rtx (tmode);
13591 if (VECTOR_MODE_P (mode0))
13592 op0 = safe_vector_operand (op0, mode0);
13594 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13595 op0 = copy_to_mode_reg (mode0, op0);
13598 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13599 op1 = copy_to_mode_reg (mode0, op1);
13601 pat = GEN_FCN (icode) (target, op0, op1);
13608 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13611 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13615 tree arg0 = TREE_VALUE (arglist);
13616 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13617 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13618 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13620 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13621 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13622 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13623 enum rtx_code comparison = d->comparison;
13625 if (VECTOR_MODE_P (mode0))
13626 op0 = safe_vector_operand (op0, mode0);
13627 if (VECTOR_MODE_P (mode1))
13628 op1 = safe_vector_operand (op1, mode1);
13630 /* Swap operands if we have a comparison that isn't available in
13634 rtx tmp = gen_reg_rtx (mode1);
13635 emit_move_insn (tmp, op1);
13641 || GET_MODE (target) != tmode
13642 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13643 target = gen_reg_rtx (tmode);
13645 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13646 op0 = copy_to_mode_reg (mode0, op0);
13647 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13648 op1 = copy_to_mode_reg (mode1, op1);
13650 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13651 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13658 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13661 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13665 tree arg0 = TREE_VALUE (arglist);
13666 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13667 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13668 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13670 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13671 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13672 enum rtx_code comparison = d->comparison;
13674 if (VECTOR_MODE_P (mode0))
13675 op0 = safe_vector_operand (op0, mode0);
13676 if (VECTOR_MODE_P (mode1))
13677 op1 = safe_vector_operand (op1, mode1);
13679 /* Swap operands if we have a comparison that isn't available in
13688 target = gen_reg_rtx (SImode);
13689 emit_move_insn (target, const0_rtx);
13690 target = gen_rtx_SUBREG (QImode, target, 0);
13692 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13693 op0 = copy_to_mode_reg (mode0, op0);
13694 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13695 op1 = copy_to_mode_reg (mode1, op1);
13697 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13698 pat = GEN_FCN (d->icode) (op0, op1);
13702 emit_insn (gen_rtx_SET (VOIDmode,
13703 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13704 gen_rtx_fmt_ee (comparison, QImode,
13708 return SUBREG_REG (target);
13711 /* Expand an expression EXP that calls a built-in function,
13712 with result going to TARGET if that's convenient
13713 (and in mode MODE if that's convenient).
13714 SUBTARGET may be used as the target for computing one of EXP's operands.
13715 IGNORE is nonzero if the value is to be ignored. */
13718 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13719 enum machine_mode mode ATTRIBUTE_UNUSED,
13720 int ignore ATTRIBUTE_UNUSED)
13722 const struct builtin_description *d;
13724 enum insn_code icode;
13725 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13726 tree arglist = TREE_OPERAND (exp, 1);
13727 tree arg0, arg1, arg2;
13728 rtx op0, op1, op2, pat;
13729 enum machine_mode tmode, mode0, mode1, mode2;
13730 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13734 case IX86_BUILTIN_EMMS:
13735 emit_insn (gen_emms ());
13738 case IX86_BUILTIN_SFENCE:
13739 emit_insn (gen_sfence ());
13742 case IX86_BUILTIN_PEXTRW:
13743 case IX86_BUILTIN_PEXTRW128:
13744 icode = (fcode == IX86_BUILTIN_PEXTRW
13745 ? CODE_FOR_mmx_pextrw
13746 : CODE_FOR_sse2_pextrw);
13747 arg0 = TREE_VALUE (arglist);
13748 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13749 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13750 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13751 tmode = insn_data[icode].operand[0].mode;
13752 mode0 = insn_data[icode].operand[1].mode;
13753 mode1 = insn_data[icode].operand[2].mode;
13755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13756 op0 = copy_to_mode_reg (mode0, op0);
13757 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13759 /* @@@ better error message */
13760 error ("selector must be an immediate");
13761 return gen_reg_rtx (tmode);
13764 || GET_MODE (target) != tmode
13765 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13766 target = gen_reg_rtx (tmode);
13767 pat = GEN_FCN (icode) (target, op0, op1);
13773 case IX86_BUILTIN_PINSRW:
13774 case IX86_BUILTIN_PINSRW128:
13775 icode = (fcode == IX86_BUILTIN_PINSRW
13776 ? CODE_FOR_mmx_pinsrw
13777 : CODE_FOR_sse2_pinsrw);
13778 arg0 = TREE_VALUE (arglist);
13779 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13780 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13781 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13782 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13783 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13784 tmode = insn_data[icode].operand[0].mode;
13785 mode0 = insn_data[icode].operand[1].mode;
13786 mode1 = insn_data[icode].operand[2].mode;
13787 mode2 = insn_data[icode].operand[3].mode;
13789 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13790 op0 = copy_to_mode_reg (mode0, op0);
13791 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13792 op1 = copy_to_mode_reg (mode1, op1);
13793 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13795 /* @@@ better error message */
13796 error ("selector must be an immediate");
13800 || GET_MODE (target) != tmode
13801 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13802 target = gen_reg_rtx (tmode);
13803 pat = GEN_FCN (icode) (target, op0, op1, op2);
13809 case IX86_BUILTIN_MASKMOVQ:
13810 case IX86_BUILTIN_MASKMOVDQU:
13811 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13812 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13813 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13814 : CODE_FOR_sse2_maskmovdqu));
13815 /* Note the arg order is different from the operand order. */
13816 arg1 = TREE_VALUE (arglist);
13817 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13818 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13819 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13820 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13821 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13822 mode0 = insn_data[icode].operand[0].mode;
13823 mode1 = insn_data[icode].operand[1].mode;
13824 mode2 = insn_data[icode].operand[2].mode;
13826 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13827 op0 = copy_to_mode_reg (mode0, op0);
13828 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13829 op1 = copy_to_mode_reg (mode1, op1);
13830 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13831 op2 = copy_to_mode_reg (mode2, op2);
13832 pat = GEN_FCN (icode) (op0, op1, op2);
13838 case IX86_BUILTIN_SQRTSS:
13839 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13840 case IX86_BUILTIN_RSQRTSS:
13841 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13842 case IX86_BUILTIN_RCPSS:
13843 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13845 case IX86_BUILTIN_LOADAPS:
13846 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13848 case IX86_BUILTIN_LOADUPS:
13849 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13851 case IX86_BUILTIN_STOREAPS:
13852 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13854 case IX86_BUILTIN_STOREUPS:
13855 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13857 case IX86_BUILTIN_LOADSS:
13858 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13860 case IX86_BUILTIN_STORESS:
13861 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13863 case IX86_BUILTIN_LOADHPS:
13864 case IX86_BUILTIN_LOADLPS:
13865 case IX86_BUILTIN_LOADHPD:
13866 case IX86_BUILTIN_LOADLPD:
13867 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13868 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13869 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13870 : CODE_FOR_sse2_movlpd);
13871 arg0 = TREE_VALUE (arglist);
13872 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13873 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13874 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13875 tmode = insn_data[icode].operand[0].mode;
13876 mode0 = insn_data[icode].operand[1].mode;
13877 mode1 = insn_data[icode].operand[2].mode;
13879 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13880 op0 = copy_to_mode_reg (mode0, op0);
13881 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13883 || GET_MODE (target) != tmode
13884 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13885 target = gen_reg_rtx (tmode);
13886 pat = GEN_FCN (icode) (target, op0, op1);
13892 case IX86_BUILTIN_STOREHPS:
13893 case IX86_BUILTIN_STORELPS:
13894 case IX86_BUILTIN_STOREHPD:
13895 case IX86_BUILTIN_STORELPD:
13896 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13897 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13898 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13899 : CODE_FOR_sse2_movlpd);
13900 arg0 = TREE_VALUE (arglist);
13901 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13902 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13903 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13904 mode0 = insn_data[icode].operand[1].mode;
13905 mode1 = insn_data[icode].operand[2].mode;
13907 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13908 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13909 op1 = copy_to_mode_reg (mode1, op1);
13911 pat = GEN_FCN (icode) (op0, op0, op1);
13917 case IX86_BUILTIN_MOVNTPS:
13918 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13919 case IX86_BUILTIN_MOVNTQ:
13920 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13922 case IX86_BUILTIN_LDMXCSR:
13923 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13924 target = assign_386_stack_local (SImode, 0);
13925 emit_move_insn (target, op0);
13926 emit_insn (gen_ldmxcsr (target));
13929 case IX86_BUILTIN_STMXCSR:
13930 target = assign_386_stack_local (SImode, 0);
13931 emit_insn (gen_stmxcsr (target));
13932 return copy_to_mode_reg (SImode, target);
13934 case IX86_BUILTIN_SHUFPS:
13935 case IX86_BUILTIN_SHUFPD:
13936 icode = (fcode == IX86_BUILTIN_SHUFPS
13937 ? CODE_FOR_sse_shufps
13938 : CODE_FOR_sse2_shufpd);
13939 arg0 = TREE_VALUE (arglist);
13940 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13941 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13942 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13943 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13944 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13945 tmode = insn_data[icode].operand[0].mode;
13946 mode0 = insn_data[icode].operand[1].mode;
13947 mode1 = insn_data[icode].operand[2].mode;
13948 mode2 = insn_data[icode].operand[3].mode;
13950 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13951 op0 = copy_to_mode_reg (mode0, op0);
13952 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13953 op1 = copy_to_mode_reg (mode1, op1);
13954 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13956 /* @@@ better error message */
13957 error ("mask must be an immediate");
13958 return gen_reg_rtx (tmode);
13961 || GET_MODE (target) != tmode
13962 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13963 target = gen_reg_rtx (tmode);
13964 pat = GEN_FCN (icode) (target, op0, op1, op2);
13970 case IX86_BUILTIN_PSHUFW:
13971 case IX86_BUILTIN_PSHUFD:
13972 case IX86_BUILTIN_PSHUFHW:
13973 case IX86_BUILTIN_PSHUFLW:
13974 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13975 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13976 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13977 : CODE_FOR_mmx_pshufw);
13978 arg0 = TREE_VALUE (arglist);
13979 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13980 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13981 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13982 tmode = insn_data[icode].operand[0].mode;
13983 mode1 = insn_data[icode].operand[1].mode;
13984 mode2 = insn_data[icode].operand[2].mode;
13986 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13987 op0 = copy_to_mode_reg (mode1, op0);
13988 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13990 /* @@@ better error message */
13991 error ("mask must be an immediate");
13995 || GET_MODE (target) != tmode
13996 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13997 target = gen_reg_rtx (tmode);
13998 pat = GEN_FCN (icode) (target, op0, op1);
14004 case IX86_BUILTIN_PSLLDQI128:
14005 case IX86_BUILTIN_PSRLDQI128:
14006 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14007 : CODE_FOR_sse2_lshrti3);
14008 arg0 = TREE_VALUE (arglist);
14009 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14010 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14011 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14012 tmode = insn_data[icode].operand[0].mode;
14013 mode1 = insn_data[icode].operand[1].mode;
14014 mode2 = insn_data[icode].operand[2].mode;
14016 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14018 op0 = copy_to_reg (op0);
14019 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14021 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14023 error ("shift must be an immediate");
14026 target = gen_reg_rtx (V2DImode);
14027 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14033 case IX86_BUILTIN_FEMMS:
14034 emit_insn (gen_femms ());
14037 case IX86_BUILTIN_PAVGUSB:
14038 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14040 case IX86_BUILTIN_PF2ID:
14041 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14043 case IX86_BUILTIN_PFACC:
14044 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14046 case IX86_BUILTIN_PFADD:
14047 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14049 case IX86_BUILTIN_PFCMPEQ:
14050 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14052 case IX86_BUILTIN_PFCMPGE:
14053 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14055 case IX86_BUILTIN_PFCMPGT:
14056 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14058 case IX86_BUILTIN_PFMAX:
14059 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14061 case IX86_BUILTIN_PFMIN:
14062 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14064 case IX86_BUILTIN_PFMUL:
14065 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14067 case IX86_BUILTIN_PFRCP:
14068 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14070 case IX86_BUILTIN_PFRCPIT1:
14071 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14073 case IX86_BUILTIN_PFRCPIT2:
14074 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14076 case IX86_BUILTIN_PFRSQIT1:
14077 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14079 case IX86_BUILTIN_PFRSQRT:
14080 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14082 case IX86_BUILTIN_PFSUB:
14083 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14085 case IX86_BUILTIN_PFSUBR:
14086 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14088 case IX86_BUILTIN_PI2FD:
14089 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14091 case IX86_BUILTIN_PMULHRW:
14092 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14094 case IX86_BUILTIN_PF2IW:
14095 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14097 case IX86_BUILTIN_PFNACC:
14098 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14100 case IX86_BUILTIN_PFPNACC:
14101 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14103 case IX86_BUILTIN_PI2FW:
14104 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14106 case IX86_BUILTIN_PSWAPDSI:
14107 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14109 case IX86_BUILTIN_PSWAPDSF:
14110 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14112 case IX86_BUILTIN_SSE_ZERO:
14113 target = gen_reg_rtx (V4SFmode);
14114 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14117 case IX86_BUILTIN_MMX_ZERO:
14118 target = gen_reg_rtx (DImode);
14119 emit_insn (gen_mmx_clrdi (target));
14122 case IX86_BUILTIN_CLRTI:
14123 target = gen_reg_rtx (V2DImode);
14124 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14128 case IX86_BUILTIN_SQRTSD:
14129 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14130 case IX86_BUILTIN_LOADAPD:
14131 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14132 case IX86_BUILTIN_LOADUPD:
14133 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14135 case IX86_BUILTIN_STOREAPD:
14136 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14137 case IX86_BUILTIN_STOREUPD:
14138 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14140 case IX86_BUILTIN_LOADSD:
14141 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14143 case IX86_BUILTIN_STORESD:
14144 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14146 case IX86_BUILTIN_SETPD1:
14147 target = assign_386_stack_local (DFmode, 0);
14148 arg0 = TREE_VALUE (arglist);
14149 emit_move_insn (adjust_address (target, DFmode, 0),
14150 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14151 op0 = gen_reg_rtx (V2DFmode);
14152 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14153 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14156 case IX86_BUILTIN_SETPD:
14157 target = assign_386_stack_local (V2DFmode, 0);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 emit_move_insn (adjust_address (target, DFmode, 0),
14161 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14162 emit_move_insn (adjust_address (target, DFmode, 8),
14163 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14164 op0 = gen_reg_rtx (V2DFmode);
14165 emit_insn (gen_sse2_movapd (op0, target));
14168 case IX86_BUILTIN_LOADRPD:
14169 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14170 gen_reg_rtx (V2DFmode), 1);
14171 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14174 case IX86_BUILTIN_LOADPD1:
14175 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14176 gen_reg_rtx (V2DFmode), 1);
14177 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14180 case IX86_BUILTIN_STOREPD1:
14181 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14182 case IX86_BUILTIN_STORERPD:
14183 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14185 case IX86_BUILTIN_CLRPD:
14186 target = gen_reg_rtx (V2DFmode);
14187 emit_insn (gen_sse_clrv2df (target));
14190 case IX86_BUILTIN_MFENCE:
14191 emit_insn (gen_sse2_mfence ());
14193 case IX86_BUILTIN_LFENCE:
14194 emit_insn (gen_sse2_lfence ());
14197 case IX86_BUILTIN_CLFLUSH:
14198 arg0 = TREE_VALUE (arglist);
14199 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14200 icode = CODE_FOR_sse2_clflush;
14201 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14202 op0 = copy_to_mode_reg (Pmode, op0);
14204 emit_insn (gen_sse2_clflush (op0));
14207 case IX86_BUILTIN_MOVNTPD:
14208 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14209 case IX86_BUILTIN_MOVNTDQ:
14210 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14211 case IX86_BUILTIN_MOVNTI:
14212 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14214 case IX86_BUILTIN_LOADDQA:
14215 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14216 case IX86_BUILTIN_LOADDQU:
14217 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14218 case IX86_BUILTIN_LOADD:
14219 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14221 case IX86_BUILTIN_STOREDQA:
14222 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14223 case IX86_BUILTIN_STOREDQU:
14224 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14225 case IX86_BUILTIN_STORED:
14226 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14228 case IX86_BUILTIN_MONITOR:
14229 arg0 = TREE_VALUE (arglist);
14230 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14231 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14232 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14233 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14234 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14236 op0 = copy_to_mode_reg (SImode, op0);
14238 op1 = copy_to_mode_reg (SImode, op1);
14240 op2 = copy_to_mode_reg (SImode, op2);
14241 emit_insn (gen_monitor (op0, op1, op2));
14244 case IX86_BUILTIN_MWAIT:
14245 arg0 = TREE_VALUE (arglist);
14246 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14247 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14248 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14250 op0 = copy_to_mode_reg (SImode, op0);
14252 op1 = copy_to_mode_reg (SImode, op1);
14253 emit_insn (gen_mwait (op0, op1));
14256 case IX86_BUILTIN_LOADDDUP:
14257 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14259 case IX86_BUILTIN_LDDQU:
14260 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14267 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14268 if (d->code == fcode)
14270 /* Compares are treated specially. */
14271 if (d->icode == CODE_FOR_maskcmpv4sf3
14272 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14273 || d->icode == CODE_FOR_maskncmpv4sf3
14274 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14275 || d->icode == CODE_FOR_maskcmpv2df3
14276 || d->icode == CODE_FOR_vmmaskcmpv2df3
14277 || d->icode == CODE_FOR_maskncmpv2df3
14278 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14279 return ix86_expand_sse_compare (d, arglist, target);
14281 return ix86_expand_binop_builtin (d->icode, arglist, target);
14284 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14285 if (d->code == fcode)
14286 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14288 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14289 if (d->code == fcode)
14290 return ix86_expand_sse_comi (d, arglist, target);
14292 /* @@@ Should really do something sensible here. */
14296 /* Store OPERAND to the memory after reload is completed. This means
14297 that we can't easily use assign_stack_local. */
14299 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14302 if (!reload_completed)
14304 if (TARGET_RED_ZONE)
14306 result = gen_rtx_MEM (mode,
14307 gen_rtx_PLUS (Pmode,
14309 GEN_INT (-RED_ZONE_SIZE)));
14310 emit_move_insn (result, operand);
14312 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14318 operand = gen_lowpart (DImode, operand);
14322 gen_rtx_SET (VOIDmode,
14323 gen_rtx_MEM (DImode,
14324 gen_rtx_PRE_DEC (DImode,
14325 stack_pointer_rtx)),
14331 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14340 split_di (&operand, 1, operands, operands + 1);
14342 gen_rtx_SET (VOIDmode,
14343 gen_rtx_MEM (SImode,
14344 gen_rtx_PRE_DEC (Pmode,
14345 stack_pointer_rtx)),
14348 gen_rtx_SET (VOIDmode,
14349 gen_rtx_MEM (SImode,
14350 gen_rtx_PRE_DEC (Pmode,
14351 stack_pointer_rtx)),
14356 /* It is better to store HImodes as SImodes. */
14357 if (!TARGET_PARTIAL_REG_STALL)
14358 operand = gen_lowpart (SImode, operand);
14362 gen_rtx_SET (VOIDmode,
14363 gen_rtx_MEM (GET_MODE (operand),
14364 gen_rtx_PRE_DEC (SImode,
14365 stack_pointer_rtx)),
14371 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14376 /* Free operand from the memory. */
14378 ix86_free_from_memory (enum machine_mode mode)
14380 if (!TARGET_RED_ZONE)
14384 if (mode == DImode || TARGET_64BIT)
14386 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14390 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14391 to pop or add instruction if registers are available. */
14392 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14393 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14398 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14399 QImode must go into class Q_REGS.
14400 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14401 movdf to do mem-to-mem moves through integer regs. */
14403 ix86_preferred_reload_class (rtx x, enum reg_class class)
14405 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14407 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14409 /* SSE can't load any constant directly yet. */
14410 if (SSE_CLASS_P (class))
14412 /* Floats can load 0 and 1. */
14413 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14415 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14416 if (MAYBE_SSE_CLASS_P (class))
14417 return (reg_class_subset_p (class, GENERAL_REGS)
14418 ? GENERAL_REGS : FLOAT_REGS);
14422 /* General regs can load everything. */
14423 if (reg_class_subset_p (class, GENERAL_REGS))
14424 return GENERAL_REGS;
14425 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14426 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14429 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14431 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14436 /* If we are copying between general and FP registers, we need a memory
14437 location. The same is true for SSE and MMX registers.
14439 The macro can't work reliably when one of the CLASSES is class containing
14440 registers from multiple units (SSE, MMX, integer). We avoid this by never
14441 combining those units in single alternative in the machine description.
14442 Ensure that this constraint holds to avoid unexpected surprises.
14444 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14445 enforce these sanity checks. */
14447 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14448 enum machine_mode mode, int strict)
14450 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14451 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14452 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14453 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14454 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14455 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14462 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14463 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14464 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14465 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14466 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14468 /* Return the cost of moving data from a register in class CLASS1 to
14469 one in class CLASS2.
14471 It is not required that the cost always equal 2 when FROM is the same as TO;
14472 on some machines it is expensive to move between registers if they are not
14473 general registers. */
14475 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14476 enum reg_class class2)
14478 /* In case we require secondary memory, compute cost of the store followed
14479 by load. In order to avoid bad register allocation choices, we need
14480 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14482 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14486 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14487 MEMORY_MOVE_COST (mode, class1, 1));
14488 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14489 MEMORY_MOVE_COST (mode, class2, 1));
14491 /* In case of copying from general_purpose_register we may emit multiple
14492 stores followed by single load causing memory size mismatch stall.
14493 Count this as arbitrarily high cost of 20. */
14494 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14497 /* In the case of FP/MMX moves, the registers actually overlap, and we
14498 have to switch modes in order to treat them differently. */
14499 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14500 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14506 /* Moves between SSE/MMX and integer unit are expensive. */
14507 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14508 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14509 return ix86_cost->mmxsse_to_integer;
14510 if (MAYBE_FLOAT_CLASS_P (class1))
14511 return ix86_cost->fp_move;
14512 if (MAYBE_SSE_CLASS_P (class1))
14513 return ix86_cost->sse_move;
14514 if (MAYBE_MMX_CLASS_P (class1))
14515 return ix86_cost->mmx_move;
14519 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14521 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14523 /* Flags and only flags can only hold CCmode values. */
14524 if (CC_REGNO_P (regno))
14525 return GET_MODE_CLASS (mode) == MODE_CC;
14526 if (GET_MODE_CLASS (mode) == MODE_CC
14527 || GET_MODE_CLASS (mode) == MODE_RANDOM
14528 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14530 if (FP_REGNO_P (regno))
14531 return VALID_FP_MODE_P (mode);
14532 if (SSE_REGNO_P (regno))
14533 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14534 if (MMX_REGNO_P (regno))
14536 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14537 /* We handle both integer and floats in the general purpose registers.
14538 In future we should be able to handle vector modes as well. */
14539 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14541 /* Take care for QImode values - they can be in non-QI regs, but then
14542 they do cause partial register stalls. */
14543 if (regno < 4 || mode != QImode || TARGET_64BIT)
14545 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14548 /* Return the cost of moving data of mode M between a
14549 register and memory. A value of 2 is the default; this cost is
14550 relative to those in `REGISTER_MOVE_COST'.
14552 If moving between registers and memory is more expensive than
14553 between two registers, you should define this macro to express the
14556 Model also increased moving costs of QImode registers in non
14560 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14562 if (FLOAT_CLASS_P (class))
14580 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14582 if (SSE_CLASS_P (class))
14585 switch (GET_MODE_SIZE (mode))
14599 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14601 if (MMX_CLASS_P (class))
14604 switch (GET_MODE_SIZE (mode))
14615 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14617 switch (GET_MODE_SIZE (mode))
14621 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14622 : ix86_cost->movzbl_load);
14624 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14625 : ix86_cost->int_store[0] + 4);
14628 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14630 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14631 if (mode == TFmode)
14633 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14634 * (((int) GET_MODE_SIZE (mode)
14635 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14639 /* Compute a (partial) cost for rtx X. Return true if the complete
14640 cost has been computed, and false if subexpressions should be
14641 scanned. In either case, *TOTAL contains the cost result. */
14644 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14646 enum machine_mode mode = GET_MODE (x);
14654 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14656 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14658 else if (flag_pic && SYMBOLIC_CONST (x)
14660 || (!GET_CODE (x) != LABEL_REF
14661 && (GET_CODE (x) != SYMBOL_REF
14662 || !SYMBOL_REF_LOCAL_P (x)))))
14669 if (mode == VOIDmode)
14672 switch (standard_80387_constant_p (x))
14677 default: /* Other constants */
14682 /* Start with (MEM (SYMBOL_REF)), since that's where
14683 it'll probably end up. Add a penalty for size. */
14684 *total = (COSTS_N_INSNS (1)
14685 + (flag_pic != 0 && !TARGET_64BIT)
14686 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14692 /* The zero extensions is often completely free on x86_64, so make
14693 it as cheap as possible. */
14694 if (TARGET_64BIT && mode == DImode
14695 && GET_MODE (XEXP (x, 0)) == SImode)
14697 else if (TARGET_ZERO_EXTEND_WITH_AND)
14698 *total = COSTS_N_INSNS (ix86_cost->add);
14700 *total = COSTS_N_INSNS (ix86_cost->movzx);
14704 *total = COSTS_N_INSNS (ix86_cost->movsx);
14708 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14709 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14711 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14714 *total = COSTS_N_INSNS (ix86_cost->add);
14717 if ((value == 2 || value == 3)
14718 && !TARGET_DECOMPOSE_LEA
14719 && ix86_cost->lea <= ix86_cost->shift_const)
14721 *total = COSTS_N_INSNS (ix86_cost->lea);
14731 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14733 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14735 if (INTVAL (XEXP (x, 1)) > 32)
14736 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14738 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14742 if (GET_CODE (XEXP (x, 1)) == AND)
14743 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14745 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14750 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14751 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14753 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14758 if (FLOAT_MODE_P (mode))
14759 *total = COSTS_N_INSNS (ix86_cost->fmul);
14760 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14762 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14765 for (nbits = 0; value != 0; value >>= 1)
14768 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14769 + nbits * ix86_cost->mult_bit);
14773 /* This is arbitrary */
14774 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14775 + 7 * ix86_cost->mult_bit);
14783 if (FLOAT_MODE_P (mode))
14784 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14786 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14790 if (FLOAT_MODE_P (mode))
14791 *total = COSTS_N_INSNS (ix86_cost->fadd);
14792 else if (!TARGET_DECOMPOSE_LEA
14793 && GET_MODE_CLASS (mode) == MODE_INT
14794 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14796 if (GET_CODE (XEXP (x, 0)) == PLUS
14797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14798 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14799 && CONSTANT_P (XEXP (x, 1)))
14801 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14802 if (val == 2 || val == 4 || val == 8)
14804 *total = COSTS_N_INSNS (ix86_cost->lea);
14805 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14806 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14808 *total += rtx_cost (XEXP (x, 1), outer_code);
14812 else if (GET_CODE (XEXP (x, 0)) == MULT
14813 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14815 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14816 if (val == 2 || val == 4 || val == 8)
14818 *total = COSTS_N_INSNS (ix86_cost->lea);
14819 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14820 *total += rtx_cost (XEXP (x, 1), outer_code);
14824 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14826 *total = COSTS_N_INSNS (ix86_cost->lea);
14827 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14828 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14829 *total += rtx_cost (XEXP (x, 1), outer_code);
14836 if (FLOAT_MODE_P (mode))
14838 *total = COSTS_N_INSNS (ix86_cost->fadd);
14846 if (!TARGET_64BIT && mode == DImode)
14848 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14849 + (rtx_cost (XEXP (x, 0), outer_code)
14850 << (GET_MODE (XEXP (x, 0)) != DImode))
14851 + (rtx_cost (XEXP (x, 1), outer_code)
14852 << (GET_MODE (XEXP (x, 1)) != DImode)));
14858 if (FLOAT_MODE_P (mode))
14860 *total = COSTS_N_INSNS (ix86_cost->fchs);
14866 if (!TARGET_64BIT && mode == DImode)
14867 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14869 *total = COSTS_N_INSNS (ix86_cost->add);
14873 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14878 if (FLOAT_MODE_P (mode))
14879 *total = COSTS_N_INSNS (ix86_cost->fabs);
14883 if (FLOAT_MODE_P (mode))
14884 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14888 if (XINT (x, 1) == UNSPEC_TP)
14897 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14899 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14902 fputs ("\tpushl $", asm_out_file);
14903 assemble_name (asm_out_file, XSTR (symbol, 0));
14904 fputc ('\n', asm_out_file);
14910 static int current_machopic_label_num;
14912 /* Given a symbol name and its associated stub, write out the
14913 definition of the stub. */
14916 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14918 unsigned int length;
14919 char *binder_name, *symbol_name, lazy_ptr_name[32];
14920 int label = ++current_machopic_label_num;
14922 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14923 symb = (*targetm.strip_name_encoding) (symb);
14925 length = strlen (stub);
14926 binder_name = alloca (length + 32);
14927 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14929 length = strlen (symb);
14930 symbol_name = alloca (length + 32);
14931 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14933 sprintf (lazy_ptr_name, "L%d$lz", label);
14936 machopic_picsymbol_stub_section ();
14938 machopic_symbol_stub_section ();
14940 fprintf (file, "%s:\n", stub);
14941 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14945 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14946 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14947 fprintf (file, "\tjmp %%edx\n");
14950 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14952 fprintf (file, "%s:\n", binder_name);
14956 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14957 fprintf (file, "\tpushl %%eax\n");
14960 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14962 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14964 machopic_lazy_symbol_ptr_section ();
14965 fprintf (file, "%s:\n", lazy_ptr_name);
14966 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14967 fprintf (file, "\t.long %s\n", binder_name);
14969 #endif /* TARGET_MACHO */
14971 /* Order the registers for register allocator. */
14974 x86_order_regs_for_local_alloc (void)
14979 /* First allocate the local general purpose registers. */
14980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14981 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14982 reg_alloc_order [pos++] = i;
14984 /* Global general purpose registers. */
14985 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14986 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14987 reg_alloc_order [pos++] = i;
14989 /* x87 registers come first in case we are doing FP math
14991 if (!TARGET_SSE_MATH)
14992 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14993 reg_alloc_order [pos++] = i;
14995 /* SSE registers. */
14996 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14997 reg_alloc_order [pos++] = i;
14998 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14999 reg_alloc_order [pos++] = i;
15001 /* x87 registers. */
15002 if (TARGET_SSE_MATH)
15003 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15004 reg_alloc_order [pos++] = i;
15006 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15007 reg_alloc_order [pos++] = i;
15009 /* Initialize the rest of array as we do not allocate some registers
15011 while (pos < FIRST_PSEUDO_REGISTER)
15012 reg_alloc_order [pos++] = 0;
15015 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15016 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15019 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15020 struct attribute_spec.handler. */
15022 ix86_handle_struct_attribute (tree *node, tree name,
15023 tree args ATTRIBUTE_UNUSED,
15024 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15027 if (DECL_P (*node))
15029 if (TREE_CODE (*node) == TYPE_DECL)
15030 type = &TREE_TYPE (*node);
15035 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15036 || TREE_CODE (*type) == UNION_TYPE)))
15038 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15039 *no_add_attrs = true;
15042 else if ((is_attribute_p ("ms_struct", name)
15043 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15044 || ((is_attribute_p ("gcc_struct", name)
15045 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15047 warning ("`%s' incompatible attribute ignored",
15048 IDENTIFIER_POINTER (name));
15049 *no_add_attrs = true;
15056 ix86_ms_bitfield_layout_p (tree record_type)
15058 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15059 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15060 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15063 /* Returns an expression indicating where the this parameter is
15064 located on entry to the FUNCTION. */
15067 x86_this_parameter (tree function)
15069 tree type = TREE_TYPE (function);
15073 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15074 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15077 if (ix86_fntype_regparm (type) > 0)
15081 parm = TYPE_ARG_TYPES (type);
15082 /* Figure out whether or not the function has a variable number of
15084 for (; parm; parm = TREE_CHAIN (parm))
15085 if (TREE_VALUE (parm) == void_type_node)
15087 /* If not, the this parameter is in %eax. */
15089 return gen_rtx_REG (SImode, 0);
15092 if (aggregate_value_p (TREE_TYPE (type)))
15093 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15095 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15098 /* Determine whether x86_output_mi_thunk can succeed. */
15101 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15102 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15103 HOST_WIDE_INT vcall_offset, tree function)
15105 /* 64-bit can handle anything. */
15109 /* For 32-bit, everything's fine if we have one free register. */
15110 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15113 /* Need a free register for vcall_offset. */
15117 /* Need a free register for GOT references. */
15118 if (flag_pic && !(*targetm.binds_local_p) (function))
15121 /* Otherwise ok. */
15125 /* Output the assembler code for a thunk function. THUNK_DECL is the
15126 declaration for the thunk function itself, FUNCTION is the decl for
15127 the target function. DELTA is an immediate constant offset to be
15128 added to THIS. If VCALL_OFFSET is nonzero, the word at
15129 *(*this + vcall_offset) should be added to THIS. */
15132 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15133 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15134 HOST_WIDE_INT vcall_offset, tree function)
15137 rtx this = x86_this_parameter (function);
15140 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15141 pull it in now and let DELTA benefit. */
15144 else if (vcall_offset)
15146 /* Put the this parameter into %eax. */
15148 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15149 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15152 this_reg = NULL_RTX;
15154 /* Adjust the this parameter by a fixed constant. */
15157 xops[0] = GEN_INT (delta);
15158 xops[1] = this_reg ? this_reg : this;
15161 if (!x86_64_general_operand (xops[0], DImode))
15163 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15165 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15169 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15172 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15175 /* Adjust the this parameter by a value stored in the vtable. */
15179 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15181 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15183 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15186 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15188 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15190 /* Adjust the this parameter. */
15191 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15192 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15194 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15195 xops[0] = GEN_INT (vcall_offset);
15197 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15198 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15200 xops[1] = this_reg;
15202 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15204 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15207 /* If necessary, drop THIS back to its stack slot. */
15208 if (this_reg && this_reg != this)
15210 xops[0] = this_reg;
15212 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15215 xops[0] = XEXP (DECL_RTL (function), 0);
15218 if (!flag_pic || (*targetm.binds_local_p) (function))
15219 output_asm_insn ("jmp\t%P0", xops);
15222 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15223 tmp = gen_rtx_CONST (Pmode, tmp);
15224 tmp = gen_rtx_MEM (QImode, tmp);
15226 output_asm_insn ("jmp\t%A0", xops);
15231 if (!flag_pic || (*targetm.binds_local_p) (function))
15232 output_asm_insn ("jmp\t%P0", xops);
15237 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15238 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15239 tmp = gen_rtx_MEM (QImode, tmp);
15241 output_asm_insn ("jmp\t%0", xops);
15244 #endif /* TARGET_MACHO */
15246 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15247 output_set_got (tmp);
15250 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15251 output_asm_insn ("jmp\t{*}%1", xops);
15257 x86_file_start (void)
15259 default_file_start ();
15260 if (X86_FILE_START_VERSION_DIRECTIVE)
15261 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15262 if (X86_FILE_START_FLTUSED)
15263 fputs ("\t.global\t__fltused\n", asm_out_file);
15264 if (ix86_asm_dialect == ASM_INTEL)
15265 fputs ("\t.intel_syntax\n", asm_out_file);
15269 x86_field_alignment (tree field, int computed)
15271 enum machine_mode mode;
15272 tree type = TREE_TYPE (field);
15274 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15276 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15277 ? get_inner_array_type (type) : type);
15278 if (mode == DFmode || mode == DCmode
15279 || GET_MODE_CLASS (mode) == MODE_INT
15280 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15281 return MIN (32, computed);
15285 /* Output assembler code to FILE to increment profiler label # LABELNO
15286 for profiling a function entry. */
15288 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15293 #ifndef NO_PROFILE_COUNTERS
15294 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15296 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15300 #ifndef NO_PROFILE_COUNTERS
15301 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15303 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15307 #ifndef NO_PROFILE_COUNTERS
15308 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15309 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15311 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15315 #ifndef NO_PROFILE_COUNTERS
15316 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15317 PROFILE_COUNT_REGISTER);
15319 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15323 /* We don't have exact information about the insn sizes, but we may assume
15324 quite safely that we are informed about all 1 byte insns and memory
15325 address sizes. This is enough to eliminate unnecessary padding in
15329 min_insn_size (rtx insn)
15333 if (!INSN_P (insn) || !active_insn_p (insn))
15336 /* Discard alignments we've emit and jump instructions. */
15337 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15338 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15340 if (GET_CODE (insn) == JUMP_INSN
15341 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15342 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15345 /* Important case - calls are always 5 bytes.
15346 It is common to have many calls in the row. */
15347 if (GET_CODE (insn) == CALL_INSN
15348 && symbolic_reference_mentioned_p (PATTERN (insn))
15349 && !SIBLING_CALL_P (insn))
15351 if (get_attr_length (insn) <= 1)
15354 /* For normal instructions we may rely on the sizes of addresses
15355 and the presence of symbol to require 4 bytes of encoding.
15356 This is not the case for jumps where references are PC relative. */
15357 if (GET_CODE (insn) != JUMP_INSN)
15359 l = get_attr_length_address (insn);
15360 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15369 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15373 k8_avoid_jump_misspredicts (void)
15375 rtx insn, start = get_insns ();
15376 int nbytes = 0, njumps = 0;
15379 /* Look for all minimal intervals of instructions containing 4 jumps.
15380 The intervals are bounded by START and INSN. NBYTES is the total
15381 size of instructions in the interval including INSN and not including
15382 START. When the NBYTES is smaller than 16 bytes, it is possible
15383 that the end of START and INSN ends up in the same 16byte page.
15385 The smallest offset in the page INSN can start is the case where START
15386 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15387 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15389 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15392 nbytes += min_insn_size (insn);
15394 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15395 INSN_UID (insn), min_insn_size (insn));
15396 if ((GET_CODE (insn) == JUMP_INSN
15397 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15398 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15399 || GET_CODE (insn) == CALL_INSN)
15406 start = NEXT_INSN (start);
15407 if ((GET_CODE (start) == JUMP_INSN
15408 && GET_CODE (PATTERN (start)) != ADDR_VEC
15409 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15410 || GET_CODE (start) == CALL_INSN)
15411 njumps--, isjump = 1;
15414 nbytes -= min_insn_size (start);
15419 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15420 INSN_UID (start), INSN_UID (insn), nbytes);
15422 if (njumps == 3 && isjump && nbytes < 16)
15424 int padsize = 15 - nbytes + min_insn_size (insn);
15427 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15428 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15433 /* Implement machine specific optimizations.
15434 At the moment we implement single transformation: AMD Athlon works faster
15435 when RET is not destination of conditional jump or directly preceded
15436 by other jump instruction. We avoid the penalty by inserting NOP just
15437 before the RET instructions in such cases. */
15443 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15445 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15447 basic_block bb = e->src;
15450 bool replace = false;
15452 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15453 || !maybe_hot_bb_p (bb))
15455 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15456 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15458 if (prev && GET_CODE (prev) == CODE_LABEL)
15461 for (e = bb->pred; e; e = e->pred_next)
15462 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15463 && !(e->flags & EDGE_FALLTHRU))
15468 prev = prev_active_insn (ret);
15470 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15471 || GET_CODE (prev) == CALL_INSN))
15473 /* Empty functions get branch mispredict even when the jump destination
15474 is not visible to us. */
15475 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15480 emit_insn_before (gen_return_internal_long (), ret);
15484 k8_avoid_jump_misspredicts ();
15487 /* Return nonzero when QImode register that must be represented via REX prefix
15490 x86_extended_QIreg_mentioned_p (rtx insn)
15493 extract_insn_cached (insn);
15494 for (i = 0; i < recog_data.n_operands; i++)
15495 if (REG_P (recog_data.operand[i])
15496 && REGNO (recog_data.operand[i]) >= 4)
15501 /* Return nonzero when P points to register encoded via REX prefix.
15502 Called via for_each_rtx. */
15504 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15506 unsigned int regno;
15509 regno = REGNO (*p);
15510 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15513 /* Return true when INSN mentions register that must be encoded using REX
15516 x86_extended_reg_mentioned_p (rtx insn)
15518 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15521 /* Generate an unsigned DImode to FP conversion. This is the same code
15522 optabs would emit if we didn't have TFmode patterns. */
15525 x86_emit_floatuns (rtx operands[2])
15527 rtx neglab, donelab, i0, i1, f0, in, out;
15528 enum machine_mode mode;
15531 in = force_reg (DImode, operands[1]);
15532 mode = GET_MODE (out);
15533 neglab = gen_label_rtx ();
15534 donelab = gen_label_rtx ();
15535 i1 = gen_reg_rtx (Pmode);
15536 f0 = gen_reg_rtx (mode);
15538 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15540 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15541 emit_jump_insn (gen_jump (donelab));
15544 emit_label (neglab);
15546 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15547 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15548 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15549 expand_float (f0, i0, 0);
15550 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15552 emit_label (donelab);
15555 /* Return if we do not know how to pass TYPE solely in registers. */
15557 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15559 if (default_must_pass_in_stack (mode, type))
15561 return (!TARGET_64BIT && type && mode == TImode);
15564 #include "gt-i386.h"