1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
524 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
525 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* In case the average insn count for single function invocation is
528 lower than this constant, emit fast (but longer) prologue and
530 #define FAST_PROLOGUE_INSN_COUNT 20
532 /* Set by prologue expander and used by epilogue expander to determine
534 static int use_fast_prologue_epilogue;
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
547 AREG, DREG, CREG, BREG,
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 /* The encoding characters for the four TLS models present in ELF. */
676 static char const tls_model_chars[] = " GLil";
678 #define MAX_386_STACK_LOCALS 3
679 /* Size of the register save area. */
680 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
682 /* Define the structure for the machine field in struct function. */
684 struct stack_local_entry GTY(())
689 struct stack_local_entry *next;
693 struct machine_function GTY(())
695 struct stack_local_entry *stack_locals;
696 const char *some_ld_name;
697 int save_varrargs_registers;
698 int accesses_prev_frame;
701 #define ix86_stack_locals (cfun->machine->stack_locals)
702 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
704 /* Structure describing stack frame layout.
705 Stack grows downward:
711 saved frame pointer if frame_pointer_needed
712 <- HARD_FRAME_POINTER
718 > to_allocate <- FRAME_POINTER
730 int outgoing_arguments_size;
733 HOST_WIDE_INT to_allocate;
734 /* The offsets relative to ARG_POINTER. */
735 HOST_WIDE_INT frame_pointer_offset;
736 HOST_WIDE_INT hard_frame_pointer_offset;
737 HOST_WIDE_INT stack_pointer_offset;
740 /* Used to enable/disable debugging features. */
741 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
742 /* Code model option as passed by user. */
743 const char *ix86_cmodel_string;
745 enum cmodel ix86_cmodel;
747 const char *ix86_asm_string;
748 enum asm_dialect ix86_asm_dialect = ASM_ATT;
750 const char *ix86_tls_dialect_string;
751 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
753 /* Which unit we are generating floating point math for. */
754 enum fpmath_unit ix86_fpmath;
756 /* Which cpu are we scheduling for. */
757 enum processor_type ix86_cpu;
758 /* Which instruction set architecture to use. */
759 enum processor_type ix86_arch;
761 /* Strings to hold which cpu and instruction set architecture to use. */
762 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
763 const char *ix86_arch_string; /* for -march=<xxx> */
764 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
766 /* # of registers to use to pass arguments. */
767 const char *ix86_regparm_string;
769 /* true if sse prefetch instruction is not NOOP. */
770 int x86_prefetch_sse;
772 /* ix86_regparm_string as a number */
775 /* Alignment to use for loops and jumps: */
777 /* Power of two alignment for loops. */
778 const char *ix86_align_loops_string;
780 /* Power of two alignment for non-loop jumps. */
781 const char *ix86_align_jumps_string;
783 /* Power of two alignment for stack boundary in bytes. */
784 const char *ix86_preferred_stack_boundary_string;
786 /* Preferred alignment for stack boundary in bits. */
787 int ix86_preferred_stack_boundary;
789 /* Values 1-5: see jump.c */
790 int ix86_branch_cost;
791 const char *ix86_branch_cost_string;
793 /* Power of two alignment for functions. */
794 const char *ix86_align_funcs_string;
796 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
797 static char internal_label_prefix[16];
798 static int internal_label_prefix_len;
800 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
801 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
802 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
803 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
805 static const char *get_some_local_dynamic_name PARAMS ((void));
806 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
807 static rtx maybe_get_pool_constant PARAMS ((rtx));
808 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
809 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
811 static rtx get_thread_pointer PARAMS ((void));
812 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
813 static rtx gen_push PARAMS ((rtx));
814 static int memory_address_length PARAMS ((rtx addr));
815 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
816 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
817 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
818 static void ix86_dump_ppro_packet PARAMS ((FILE *));
819 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
820 static struct machine_function * ix86_init_machine_status PARAMS ((void));
821 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
822 static int ix86_nsaved_regs PARAMS ((void));
823 static void ix86_emit_save_regs PARAMS ((void));
824 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
825 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
826 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
827 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
828 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
829 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
830 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
831 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
832 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
833 static int ix86_issue_rate PARAMS ((void));
834 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
835 static void ix86_sched_init PARAMS ((FILE *, int, int));
836 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
837 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
838 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
839 static int ia32_multipass_dfa_lookahead PARAMS ((void));
840 static void ix86_init_mmx_sse_builtins PARAMS ((void));
841 static rtx x86_this_parameter PARAMS ((tree));
842 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
843 HOST_WIDE_INT, tree));
844 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
845 HOST_WIDE_INT, tree));
846 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
850 rtx base, index, disp;
854 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
855 static int ix86_address_cost PARAMS ((rtx));
856 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
857 static rtx ix86_delegitimize_address PARAMS ((rtx));
859 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
860 static const char *ix86_strip_name_encoding PARAMS ((const char *))
863 struct builtin_description;
864 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
866 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
868 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
869 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
870 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
871 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
872 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
873 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
874 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
878 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
880 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
881 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
882 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
883 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
884 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
885 static int ix86_save_reg PARAMS ((unsigned int, int));
886 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
887 static int ix86_comp_type_attributes PARAMS ((tree, tree));
888 static int ix86_fntype_regparm PARAMS ((tree));
889 const struct attribute_spec ix86_attribute_table[];
890 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
891 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
892 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
893 static int ix86_value_regno PARAMS ((enum machine_mode));
894 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
895 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
896 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
897 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
898 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
900 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
901 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
904 /* Register class used for passing given 64bit part of the argument.
905 These represent classes as documented by the PS ABI, with the exception
906 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
907 use SF or DFmode move instead of DImode to avoid reformatting penalties.
909 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
910 whenever possible (upper half does contain padding).
912 enum x86_64_reg_class
915 X86_64_INTEGER_CLASS,
916 X86_64_INTEGERSI_CLASS,
925 static const char * const x86_64_reg_class_name[] =
926 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
928 #define MAX_CLASSES 4
929 static int classify_argument PARAMS ((enum machine_mode, tree,
930 enum x86_64_reg_class [MAX_CLASSES],
932 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
934 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
936 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
937 enum x86_64_reg_class));
939 /* Table of constants used by fldpi, fldln2, etc... */
940 static REAL_VALUE_TYPE ext_80387_constants_table [5];
941 static bool ext_80387_constants_init = 0;
942 static void init_ext_80387_constants PARAMS ((void));
944 /* Initialize the GCC target structure. */
945 #undef TARGET_ATTRIBUTE_TABLE
946 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
947 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
948 # undef TARGET_MERGE_DECL_ATTRIBUTES
949 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
952 #undef TARGET_COMP_TYPE_ATTRIBUTES
953 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
955 #undef TARGET_INIT_BUILTINS
956 #define TARGET_INIT_BUILTINS ix86_init_builtins
958 #undef TARGET_EXPAND_BUILTIN
959 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
961 #undef TARGET_ASM_FUNCTION_EPILOGUE
962 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
964 #undef TARGET_ASM_OPEN_PAREN
965 #define TARGET_ASM_OPEN_PAREN ""
966 #undef TARGET_ASM_CLOSE_PAREN
967 #define TARGET_ASM_CLOSE_PAREN ""
969 #undef TARGET_ASM_ALIGNED_HI_OP
970 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
971 #undef TARGET_ASM_ALIGNED_SI_OP
972 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
974 #undef TARGET_ASM_ALIGNED_DI_OP
975 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
978 #undef TARGET_ASM_UNALIGNED_HI_OP
979 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
980 #undef TARGET_ASM_UNALIGNED_SI_OP
981 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
982 #undef TARGET_ASM_UNALIGNED_DI_OP
983 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
985 #undef TARGET_SCHED_ADJUST_COST
986 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
987 #undef TARGET_SCHED_ISSUE_RATE
988 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
989 #undef TARGET_SCHED_VARIABLE_ISSUE
990 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
991 #undef TARGET_SCHED_INIT
992 #define TARGET_SCHED_INIT ix86_sched_init
993 #undef TARGET_SCHED_REORDER
994 #define TARGET_SCHED_REORDER ix86_sched_reorder
995 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
996 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
997 ia32_use_dfa_pipeline_interface
998 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
999 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1000 ia32_multipass_dfa_lookahead
1002 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1003 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1006 #undef TARGET_HAVE_TLS
1007 #define TARGET_HAVE_TLS true
1009 #undef TARGET_CANNOT_FORCE_CONST_MEM
1010 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1012 #undef TARGET_DELEGITIMIZE_ADDRESS
1013 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1015 #undef TARGET_MS_BITFIELD_LAYOUT_P
1016 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1018 #undef TARGET_ASM_OUTPUT_MI_THUNK
1019 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1020 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1021 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1023 #undef TARGET_RTX_COSTS
1024 #define TARGET_RTX_COSTS ix86_rtx_costs
1025 #undef TARGET_ADDRESS_COST
1026 #define TARGET_ADDRESS_COST ix86_address_cost
1028 struct gcc_target targetm = TARGET_INITIALIZER;
1030 /* Sometimes certain combinations of command options do not make
1031 sense on a particular target machine. You can define a macro
1032 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1033 defined, is executed once just after all the command options have
1036 Don't use this macro to turn on various extra optimizations for
1037 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1043 /* Comes from final.c -- no real reason to change it. */
1044 #define MAX_CODE_ALIGN 16
1048 const struct processor_costs *cost; /* Processor costs */
1049 const int target_enable; /* Target flags to enable. */
1050 const int target_disable; /* Target flags to disable. */
1051 const int align_loop; /* Default alignments. */
1052 const int align_loop_max_skip;
1053 const int align_jump;
1054 const int align_jump_max_skip;
1055 const int align_func;
1057 const processor_target_table[PROCESSOR_max] =
1059 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1060 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1061 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1063 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1064 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1065 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1066 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1069 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1072 const char *const name; /* processor name or nickname. */
1073 const enum processor_type processor;
1074 const enum pta_flags
1079 PTA_PREFETCH_SSE = 8,
1085 const processor_alias_table[] =
1087 {"i386", PROCESSOR_I386, 0},
1088 {"i486", PROCESSOR_I486, 0},
1089 {"i586", PROCESSOR_PENTIUM, 0},
1090 {"pentium", PROCESSOR_PENTIUM, 0},
1091 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1092 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1093 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1094 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1095 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1096 {"i686", PROCESSOR_PENTIUMPRO, 0},
1097 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1098 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1099 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1100 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1101 PTA_MMX | PTA_PREFETCH_SSE},
1102 {"k6", PROCESSOR_K6, PTA_MMX},
1103 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1104 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1105 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1108 | PTA_3DNOW | PTA_3DNOW_A},
1109 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1110 | PTA_3DNOW_A | PTA_SSE},
1111 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1112 | PTA_3DNOW_A | PTA_SSE},
1113 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1114 | PTA_3DNOW_A | PTA_SSE},
1115 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1116 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1119 int const pta_size = ARRAY_SIZE (processor_alias_table);
1121 /* By default our XFmode is the 80-bit extended format. If we have
1122 use TFmode instead, it's also the 80-bit format, but with padding. */
1123 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1124 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1126 /* Set the default values for switches whose default depends on TARGET_64BIT
1127 in case they weren't overwritten by command line options. */
1130 if (flag_omit_frame_pointer == 2)
1131 flag_omit_frame_pointer = 1;
1132 if (flag_asynchronous_unwind_tables == 2)
1133 flag_asynchronous_unwind_tables = 1;
1134 if (flag_pcc_struct_return == 2)
1135 flag_pcc_struct_return = 0;
1139 if (flag_omit_frame_pointer == 2)
1140 flag_omit_frame_pointer = 0;
1141 if (flag_asynchronous_unwind_tables == 2)
1142 flag_asynchronous_unwind_tables = 0;
1143 if (flag_pcc_struct_return == 2)
1144 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1147 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1148 SUBTARGET_OVERRIDE_OPTIONS;
1151 if (!ix86_cpu_string && ix86_arch_string)
1152 ix86_cpu_string = ix86_arch_string;
1153 if (!ix86_cpu_string)
1154 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1155 if (!ix86_arch_string)
1156 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1158 if (ix86_cmodel_string != 0)
1160 if (!strcmp (ix86_cmodel_string, "small"))
1161 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1163 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1164 else if (!strcmp (ix86_cmodel_string, "32"))
1165 ix86_cmodel = CM_32;
1166 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1167 ix86_cmodel = CM_KERNEL;
1168 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1169 ix86_cmodel = CM_MEDIUM;
1170 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1171 ix86_cmodel = CM_LARGE;
1173 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1177 ix86_cmodel = CM_32;
1179 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1181 if (ix86_asm_string != 0)
1183 if (!strcmp (ix86_asm_string, "intel"))
1184 ix86_asm_dialect = ASM_INTEL;
1185 else if (!strcmp (ix86_asm_string, "att"))
1186 ix86_asm_dialect = ASM_ATT;
1188 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1190 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1191 error ("code model `%s' not supported in the %s bit mode",
1192 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1193 if (ix86_cmodel == CM_LARGE)
1194 sorry ("code model `large' not supported yet");
1195 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1196 sorry ("%i-bit mode not compiled in",
1197 (target_flags & MASK_64BIT) ? 64 : 32);
1199 for (i = 0; i < pta_size; i++)
1200 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1202 ix86_arch = processor_alias_table[i].processor;
1203 /* Default cpu tuning to the architecture. */
1204 ix86_cpu = ix86_arch;
1205 if (processor_alias_table[i].flags & PTA_MMX
1206 && !(target_flags_explicit & MASK_MMX))
1207 target_flags |= MASK_MMX;
1208 if (processor_alias_table[i].flags & PTA_3DNOW
1209 && !(target_flags_explicit & MASK_3DNOW))
1210 target_flags |= MASK_3DNOW;
1211 if (processor_alias_table[i].flags & PTA_3DNOW_A
1212 && !(target_flags_explicit & MASK_3DNOW_A))
1213 target_flags |= MASK_3DNOW_A;
1214 if (processor_alias_table[i].flags & PTA_SSE
1215 && !(target_flags_explicit & MASK_SSE))
1216 target_flags |= MASK_SSE;
1217 if (processor_alias_table[i].flags & PTA_SSE2
1218 && !(target_flags_explicit & MASK_SSE2))
1219 target_flags |= MASK_SSE2;
1220 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1221 x86_prefetch_sse = true;
1222 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1223 error ("CPU you selected does not support x86-64 instruction set");
1228 error ("bad value (%s) for -march= switch", ix86_arch_string);
1230 for (i = 0; i < pta_size; i++)
1231 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1233 ix86_cpu = processor_alias_table[i].processor;
1234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1235 error ("CPU you selected does not support x86-64 instruction set");
1238 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1239 x86_prefetch_sse = true;
1241 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1244 ix86_cost = &size_cost;
1246 ix86_cost = processor_target_table[ix86_cpu].cost;
1247 target_flags |= processor_target_table[ix86_cpu].target_enable;
1248 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1250 /* Arrange to set up i386_stack_locals for all functions. */
1251 init_machine_status = ix86_init_machine_status;
1253 /* Validate -mregparm= value. */
1254 if (ix86_regparm_string)
1256 i = atoi (ix86_regparm_string);
1257 if (i < 0 || i > REGPARM_MAX)
1258 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1264 ix86_regparm = REGPARM_MAX;
1266 /* If the user has provided any of the -malign-* options,
1267 warn and use that value only if -falign-* is not set.
1268 Remove this code in GCC 3.2 or later. */
1269 if (ix86_align_loops_string)
1271 warning ("-malign-loops is obsolete, use -falign-loops");
1272 if (align_loops == 0)
1274 i = atoi (ix86_align_loops_string);
1275 if (i < 0 || i > MAX_CODE_ALIGN)
1276 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1278 align_loops = 1 << i;
1282 if (ix86_align_jumps_string)
1284 warning ("-malign-jumps is obsolete, use -falign-jumps");
1285 if (align_jumps == 0)
1287 i = atoi (ix86_align_jumps_string);
1288 if (i < 0 || i > MAX_CODE_ALIGN)
1289 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1291 align_jumps = 1 << i;
1295 if (ix86_align_funcs_string)
1297 warning ("-malign-functions is obsolete, use -falign-functions");
1298 if (align_functions == 0)
1300 i = atoi (ix86_align_funcs_string);
1301 if (i < 0 || i > MAX_CODE_ALIGN)
1302 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1304 align_functions = 1 << i;
1308 /* Default align_* from the processor table. */
1309 if (align_loops == 0)
1311 align_loops = processor_target_table[ix86_cpu].align_loop;
1312 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1314 if (align_jumps == 0)
1316 align_jumps = processor_target_table[ix86_cpu].align_jump;
1317 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1319 if (align_functions == 0)
1321 align_functions = processor_target_table[ix86_cpu].align_func;
1324 /* Validate -mpreferred-stack-boundary= value, or provide default.
1325 The default of 128 bits is for Pentium III's SSE __m128, but we
1326 don't want additional code to keep the stack aligned when
1327 optimizing for code size. */
1328 ix86_preferred_stack_boundary = (optimize_size
1329 ? TARGET_64BIT ? 128 : 32
1331 if (ix86_preferred_stack_boundary_string)
1333 i = atoi (ix86_preferred_stack_boundary_string);
1334 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1335 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1336 TARGET_64BIT ? 4 : 2);
1338 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1341 /* Validate -mbranch-cost= value, or provide default. */
1342 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1343 if (ix86_branch_cost_string)
1345 i = atoi (ix86_branch_cost_string);
1347 error ("-mbranch-cost=%d is not between 0 and 5", i);
1349 ix86_branch_cost = i;
1352 if (ix86_tls_dialect_string)
1354 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1355 ix86_tls_dialect = TLS_DIALECT_GNU;
1356 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1357 ix86_tls_dialect = TLS_DIALECT_SUN;
1359 error ("bad value (%s) for -mtls-dialect= switch",
1360 ix86_tls_dialect_string);
1363 /* Keep nonleaf frame pointers. */
1364 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1365 flag_omit_frame_pointer = 1;
1367 /* If we're doing fast math, we don't care about comparison order
1368 wrt NaNs. This lets us use a shorter comparison sequence. */
1369 if (flag_unsafe_math_optimizations)
1370 target_flags &= ~MASK_IEEE_FP;
1372 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1373 since the insns won't need emulation. */
1374 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1375 target_flags &= ~MASK_NO_FANCY_MATH_387;
1379 if (TARGET_ALIGN_DOUBLE)
1380 error ("-malign-double makes no sense in the 64bit mode");
1382 error ("-mrtd calling convention not supported in the 64bit mode");
1383 /* Enable by default the SSE and MMX builtins. */
1384 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1385 ix86_fpmath = FPMATH_SSE;
1388 ix86_fpmath = FPMATH_387;
1390 if (ix86_fpmath_string != 0)
1392 if (! strcmp (ix86_fpmath_string, "387"))
1393 ix86_fpmath = FPMATH_387;
1394 else if (! strcmp (ix86_fpmath_string, "sse"))
1398 warning ("SSE instruction set disabled, using 387 arithmetics");
1399 ix86_fpmath = FPMATH_387;
1402 ix86_fpmath = FPMATH_SSE;
1404 else if (! strcmp (ix86_fpmath_string, "387,sse")
1405 || ! strcmp (ix86_fpmath_string, "sse,387"))
1409 warning ("SSE instruction set disabled, using 387 arithmetics");
1410 ix86_fpmath = FPMATH_387;
1412 else if (!TARGET_80387)
1414 warning ("387 instruction set disabled, using SSE arithmetics");
1415 ix86_fpmath = FPMATH_SSE;
1418 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1421 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1424 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1428 target_flags |= MASK_MMX;
1429 x86_prefetch_sse = true;
1432 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1435 target_flags |= MASK_MMX;
1436 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1437 extensions it adds. */
1438 if (x86_3dnow_a & (1 << ix86_arch))
1439 target_flags |= MASK_3DNOW_A;
1441 if ((x86_accumulate_outgoing_args & CPUMASK)
1442 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1444 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1446 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1449 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1450 p = strchr (internal_label_prefix, 'X');
1451 internal_label_prefix_len = p - internal_label_prefix;
1457 optimization_options (level, size)
1459 int size ATTRIBUTE_UNUSED;
1461 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1462 make the problem with not enough registers even worse. */
1463 #ifdef INSN_SCHEDULING
1465 flag_schedule_insns = 0;
1468 /* The default values of these switches depend on the TARGET_64BIT
1469 that is not known at this moment. Mark these values with 2 and
1470 let user the to override these. In case there is no command line option
1471 specifying them, we will set the defaults in override_options. */
1473 flag_omit_frame_pointer = 2;
1474 flag_pcc_struct_return = 2;
1475 flag_asynchronous_unwind_tables = 2;
1478 /* Table of valid machine attributes. */
1479 const struct attribute_spec ix86_attribute_table[] =
1481 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1482 /* Stdcall attribute says callee is responsible for popping arguments
1483 if they are not variable. */
1484 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1485 /* Fastcall attribute says callee is responsible for popping arguments
1486 if they are not variable. */
1487 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1488 /* Cdecl attribute says the callee is a normal C declaration */
1489 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Regparm attribute specifies how many integer arguments are to be
1491 passed in registers. */
1492 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1493 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1494 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1495 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1496 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1498 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1499 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1500 { NULL, 0, 0, false, false, false, NULL }
1503 /* Decide whether we can make a sibling call to a function. DECL is the
1504 declaration of the function being targeted by the call and EXP is the
1505 CALL_EXPR representing the call. */
1508 ix86_function_ok_for_sibcall (decl, exp)
1512 /* If we are generating position-independent code, we cannot sibcall
1513 optimize any indirect call, or a direct call to a global function,
1514 as the PLT requires %ebx be live. */
1515 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1518 /* If we are returning floats on the 80387 register stack, we cannot
1519 make a sibcall from a function that doesn't return a float to a
1520 function that does or, conversely, from a function that does return
1521 a float to a function that doesn't; the necessary stack adjustment
1522 would not be executed. */
1523 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1524 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1527 /* If this call is indirect, we'll need to be able to use a call-clobbered
1528 register for the address of the target function. Make sure that all
1529 such registers are not used for passing parameters. */
1530 if (!decl && !TARGET_64BIT)
1532 int regparm = ix86_regparm;
1535 /* We're looking at the CALL_EXPR, we need the type of the function. */
1536 type = TREE_OPERAND (exp, 0); /* pointer expression */
1537 type = TREE_TYPE (type); /* pointer type */
1538 type = TREE_TYPE (type); /* function type */
1540 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1542 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1546 /* ??? Need to count the actual number of registers to be used,
1547 not the possible number of registers. Fix later. */
1552 /* Otherwise okay. That also includes certain types of indirect calls. */
1556 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1557 arguments as in struct attribute_spec.handler. */
1559 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1562 tree args ATTRIBUTE_UNUSED;
1563 int flags ATTRIBUTE_UNUSED;
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1577 if (is_attribute_p ("fastcall", name))
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1581 error ("fastcall and stdcall attributes are not compatible");
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1585 error ("fastcall and regparm attributes are not compatible");
1588 else if (is_attribute_p ("stdcall", name))
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1592 error ("fastcall and stdcall attributes are not compatible");
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1606 /* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1609 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1613 int flags ATTRIBUTE_UNUSED;
1616 if (TREE_CODE (*node) != FUNCTION_TYPE
1617 && TREE_CODE (*node) != METHOD_TYPE
1618 && TREE_CODE (*node) != FIELD_DECL
1619 && TREE_CODE (*node) != TYPE_DECL)
1621 warning ("`%s' attribute only applies to functions",
1622 IDENTIFIER_POINTER (name));
1623 *no_add_attrs = true;
1629 cst = TREE_VALUE (args);
1630 if (TREE_CODE (cst) != INTEGER_CST)
1632 warning ("`%s' attribute requires an integer constant argument",
1633 IDENTIFIER_POINTER (name));
1634 *no_add_attrs = true;
1636 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1638 warning ("argument to `%s' attribute larger than %d",
1639 IDENTIFIER_POINTER (name), REGPARM_MAX);
1640 *no_add_attrs = true;
1643 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1645 error ("fastcall and regparm attributes are not compatible");
1652 /* Return 0 if the attributes for two types are incompatible, 1 if they
1653 are compatible, and 2 if they are nearly compatible (which causes a
1654 warning to be generated). */
1657 ix86_comp_type_attributes (type1, type2)
1661 /* Check for mismatch of non-default calling convention. */
1662 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1664 if (TREE_CODE (type1) != FUNCTION_TYPE)
1667 /* Check for mismatched fastcall types */
1668 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1669 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1672 /* Check for mismatched return types (cdecl vs stdcall). */
1673 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1674 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1679 /* Return the regparm value for a fuctio with the indicated TYPE. */
1682 ix86_fntype_regparm (type)
1687 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1689 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1691 return ix86_regparm;
1694 /* Value is the number of bytes of arguments automatically
1695 popped when returning from a subroutine call.
1696 FUNDECL is the declaration node of the function (as a tree),
1697 FUNTYPE is the data type of the function (as a tree),
1698 or for a library call it is an identifier node for the subroutine name.
1699 SIZE is the number of bytes of arguments passed on the stack.
1701 On the 80386, the RTD insn may be used to pop them if the number
1702 of args is fixed, but if the number is variable then the caller
1703 must pop them all. RTD can't be used for library calls now
1704 because the library is compiled with the Unix compiler.
1705 Use of RTD is a selectable option, since it is incompatible with
1706 standard Unix calling sequences. If the option is not selected,
1707 the caller must always pop the args.
1709 The attribute stdcall is equivalent to RTD on a per module basis. */
1712 ix86_return_pops_args (fundecl, funtype, size)
1717 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1719 /* Cdecl functions override -mrtd, and never pop the stack. */
1720 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1722 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1723 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1724 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1728 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1729 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1730 == void_type_node)))
1734 /* Lose any fake structure return argument if it is passed on the stack. */
1735 if (aggregate_value_p (TREE_TYPE (funtype))
1738 int nregs = ix86_fntype_regparm (funtype);
1741 return GET_MODE_SIZE (Pmode);
1747 /* Argument support functions. */
1749 /* Return true when register may be used to pass function parameters. */
1751 ix86_function_arg_regno_p (regno)
1756 return (regno < REGPARM_MAX
1757 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1758 if (SSE_REGNO_P (regno) && TARGET_SSE)
1760 /* RAX is used as hidden argument to va_arg functions. */
1763 for (i = 0; i < REGPARM_MAX; i++)
1764 if (regno == x86_64_int_parameter_registers[i])
1769 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1770 for a call to a function whose data type is FNTYPE.
1771 For a library call, FNTYPE is 0. */
1774 init_cumulative_args (cum, fntype, libname)
1775 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1776 tree fntype; /* tree ptr for function decl */
1777 rtx libname; /* SYMBOL_REF of library name or 0 */
1779 static CUMULATIVE_ARGS zero_cum;
1780 tree param, next_param;
1782 if (TARGET_DEBUG_ARG)
1784 fprintf (stderr, "\ninit_cumulative_args (");
1786 fprintf (stderr, "fntype code = %s, ret code = %s",
1787 tree_code_name[(int) TREE_CODE (fntype)],
1788 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1790 fprintf (stderr, "no fntype");
1793 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1798 /* Set up the number of registers to use for passing arguments. */
1799 cum->nregs = ix86_regparm;
1800 cum->sse_nregs = SSE_REGPARM_MAX;
1801 if (fntype && !TARGET_64BIT)
1803 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1806 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1808 cum->maybe_vaarg = false;
1810 /* Use ecx and edx registers if function has fastcall attribute */
1811 if (fntype && !TARGET_64BIT)
1813 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1821 /* Determine if this function has variable arguments. This is
1822 indicated by the last argument being 'void_type_mode' if there
1823 are no variable arguments. If there are variable arguments, then
1824 we won't pass anything in registers */
1828 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1829 param != 0; param = next_param)
1831 next_param = TREE_CHAIN (param);
1832 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1839 cum->maybe_vaarg = true;
1843 if ((!fntype && !libname)
1844 || (fntype && !TYPE_ARG_TYPES (fntype)))
1845 cum->maybe_vaarg = 1;
1847 if (TARGET_DEBUG_ARG)
1848 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1853 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1854 of this code is to classify each 8bytes of incoming argument by the register
1855 class and assign registers accordingly. */
1857 /* Return the union class of CLASS1 and CLASS2.
1858 See the x86-64 PS ABI for details. */
1860 static enum x86_64_reg_class
1861 merge_classes (class1, class2)
1862 enum x86_64_reg_class class1, class2;
1864 /* Rule #1: If both classes are equal, this is the resulting class. */
1865 if (class1 == class2)
1868 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1870 if (class1 == X86_64_NO_CLASS)
1872 if (class2 == X86_64_NO_CLASS)
1875 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1876 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1877 return X86_64_MEMORY_CLASS;
1879 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1880 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1881 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1882 return X86_64_INTEGERSI_CLASS;
1883 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1884 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1885 return X86_64_INTEGER_CLASS;
1887 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1888 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1889 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1890 return X86_64_MEMORY_CLASS;
1892 /* Rule #6: Otherwise class SSE is used. */
1893 return X86_64_SSE_CLASS;
1896 /* Classify the argument of type TYPE and mode MODE.
1897 CLASSES will be filled by the register class used to pass each word
1898 of the operand. The number of words is returned. In case the parameter
1899 should be passed in memory, 0 is returned. As a special case for zero
1900 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1902 BIT_OFFSET is used internally for handling records and specifies offset
1903 of the offset in bits modulo 256 to avoid overflow cases.
1905 See the x86-64 PS ABI for details.
1909 classify_argument (mode, type, classes, bit_offset)
1910 enum machine_mode mode;
1912 enum x86_64_reg_class classes[MAX_CLASSES];
1916 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1917 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1919 /* Variable sized entities are always passed/returned in memory. */
1923 if (type && AGGREGATE_TYPE_P (type))
1927 enum x86_64_reg_class subclasses[MAX_CLASSES];
1929 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1933 for (i = 0; i < words; i++)
1934 classes[i] = X86_64_NO_CLASS;
1936 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1937 signalize memory class, so handle it as special case. */
1940 classes[0] = X86_64_NO_CLASS;
1944 /* Classify each field of record and merge classes. */
1945 if (TREE_CODE (type) == RECORD_TYPE)
1947 /* For classes first merge in the field of the subclasses. */
1948 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1950 tree bases = TYPE_BINFO_BASETYPES (type);
1951 int n_bases = TREE_VEC_LENGTH (bases);
1954 for (i = 0; i < n_bases; ++i)
1956 tree binfo = TREE_VEC_ELT (bases, i);
1958 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1959 tree type = BINFO_TYPE (binfo);
1961 num = classify_argument (TYPE_MODE (type),
1963 (offset + bit_offset) % 256);
1966 for (i = 0; i < num; i++)
1968 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1970 merge_classes (subclasses[i], classes[i + pos]);
1974 /* And now merge the fields of structure. */
1975 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1977 if (TREE_CODE (field) == FIELD_DECL)
1981 /* Bitfields are always classified as integer. Handle them
1982 early, since later code would consider them to be
1983 misaligned integers. */
1984 if (DECL_BIT_FIELD (field))
1986 for (i = int_bit_position (field) / 8 / 8;
1987 i < (int_bit_position (field)
1988 + tree_low_cst (DECL_SIZE (field), 0)
1991 merge_classes (X86_64_INTEGER_CLASS,
1996 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1997 TREE_TYPE (field), subclasses,
1998 (int_bit_position (field)
1999 + bit_offset) % 256);
2002 for (i = 0; i < num; i++)
2005 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2007 merge_classes (subclasses[i], classes[i + pos]);
2013 /* Arrays are handled as small records. */
2014 else if (TREE_CODE (type) == ARRAY_TYPE)
2017 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2018 TREE_TYPE (type), subclasses, bit_offset);
2022 /* The partial classes are now full classes. */
2023 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2024 subclasses[0] = X86_64_SSE_CLASS;
2025 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2026 subclasses[0] = X86_64_INTEGER_CLASS;
2028 for (i = 0; i < words; i++)
2029 classes[i] = subclasses[i % num];
2031 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2032 else if (TREE_CODE (type) == UNION_TYPE
2033 || TREE_CODE (type) == QUAL_UNION_TYPE)
2035 /* For classes first merge in the field of the subclasses. */
2036 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2038 tree bases = TYPE_BINFO_BASETYPES (type);
2039 int n_bases = TREE_VEC_LENGTH (bases);
2042 for (i = 0; i < n_bases; ++i)
2044 tree binfo = TREE_VEC_ELT (bases, i);
2046 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2047 tree type = BINFO_TYPE (binfo);
2049 num = classify_argument (TYPE_MODE (type),
2051 (offset + (bit_offset % 64)) % 256);
2054 for (i = 0; i < num; i++)
2056 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2058 merge_classes (subclasses[i], classes[i + pos]);
2062 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2064 if (TREE_CODE (field) == FIELD_DECL)
2067 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2068 TREE_TYPE (field), subclasses,
2072 for (i = 0; i < num; i++)
2073 classes[i] = merge_classes (subclasses[i], classes[i]);
2080 /* Final merger cleanup. */
2081 for (i = 0; i < words; i++)
2083 /* If one class is MEMORY, everything should be passed in
2085 if (classes[i] == X86_64_MEMORY_CLASS)
2088 /* The X86_64_SSEUP_CLASS should be always preceded by
2089 X86_64_SSE_CLASS. */
2090 if (classes[i] == X86_64_SSEUP_CLASS
2091 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2092 classes[i] = X86_64_SSE_CLASS;
2094 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2095 if (classes[i] == X86_64_X87UP_CLASS
2096 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2097 classes[i] = X86_64_SSE_CLASS;
2102 /* Compute alignment needed. We align all types to natural boundaries with
2103 exception of XFmode that is aligned to 64bits. */
2104 if (mode != VOIDmode && mode != BLKmode)
2106 int mode_alignment = GET_MODE_BITSIZE (mode);
2109 mode_alignment = 128;
2110 else if (mode == XCmode)
2111 mode_alignment = 256;
2112 /* Misaligned fields are always returned in memory. */
2113 if (bit_offset % mode_alignment)
2117 /* Classification of atomic types. */
2127 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2128 classes[0] = X86_64_INTEGERSI_CLASS;
2130 classes[0] = X86_64_INTEGER_CLASS;
2134 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2137 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2138 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2141 if (!(bit_offset % 64))
2142 classes[0] = X86_64_SSESF_CLASS;
2144 classes[0] = X86_64_SSE_CLASS;
2147 classes[0] = X86_64_SSEDF_CLASS;
2150 classes[0] = X86_64_X87_CLASS;
2151 classes[1] = X86_64_X87UP_CLASS;
2154 classes[0] = X86_64_X87_CLASS;
2155 classes[1] = X86_64_X87UP_CLASS;
2156 classes[2] = X86_64_X87_CLASS;
2157 classes[3] = X86_64_X87UP_CLASS;
2160 classes[0] = X86_64_SSEDF_CLASS;
2161 classes[1] = X86_64_SSEDF_CLASS;
2164 classes[0] = X86_64_SSE_CLASS;
2172 classes[0] = X86_64_SSE_CLASS;
2173 classes[1] = X86_64_SSEUP_CLASS;
2188 /* Examine the argument and return set number of register required in each
2189 class. Return 0 iff parameter should be passed in memory. */
2191 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2192 enum machine_mode mode;
2194 int *int_nregs, *sse_nregs;
2197 enum x86_64_reg_class class[MAX_CLASSES];
2198 int n = classify_argument (mode, type, class, 0);
2204 for (n--; n >= 0; n--)
2207 case X86_64_INTEGER_CLASS:
2208 case X86_64_INTEGERSI_CLASS:
2211 case X86_64_SSE_CLASS:
2212 case X86_64_SSESF_CLASS:
2213 case X86_64_SSEDF_CLASS:
2216 case X86_64_NO_CLASS:
2217 case X86_64_SSEUP_CLASS:
2219 case X86_64_X87_CLASS:
2220 case X86_64_X87UP_CLASS:
2224 case X86_64_MEMORY_CLASS:
2229 /* Construct container for the argument used by GCC interface. See
2230 FUNCTION_ARG for the detailed description. */
2232 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2233 enum machine_mode mode;
2236 int nintregs, nsseregs;
2240 enum machine_mode tmpmode;
2242 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2243 enum x86_64_reg_class class[MAX_CLASSES];
2247 int needed_sseregs, needed_intregs;
2248 rtx exp[MAX_CLASSES];
2251 n = classify_argument (mode, type, class, 0);
2252 if (TARGET_DEBUG_ARG)
2255 fprintf (stderr, "Memory class\n");
2258 fprintf (stderr, "Classes:");
2259 for (i = 0; i < n; i++)
2261 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2263 fprintf (stderr, "\n");
2268 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2270 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2273 /* First construct simple cases. Avoid SCmode, since we want to use
2274 single register to pass this type. */
2275 if (n == 1 && mode != SCmode)
2278 case X86_64_INTEGER_CLASS:
2279 case X86_64_INTEGERSI_CLASS:
2280 return gen_rtx_REG (mode, intreg[0]);
2281 case X86_64_SSE_CLASS:
2282 case X86_64_SSESF_CLASS:
2283 case X86_64_SSEDF_CLASS:
2284 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2285 case X86_64_X87_CLASS:
2286 return gen_rtx_REG (mode, FIRST_STACK_REG);
2287 case X86_64_NO_CLASS:
2288 /* Zero sized array, struct or class. */
2293 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2294 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2296 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2297 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2298 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2299 && class[1] == X86_64_INTEGER_CLASS
2300 && (mode == CDImode || mode == TImode)
2301 && intreg[0] + 1 == intreg[1])
2302 return gen_rtx_REG (mode, intreg[0]);
2304 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2305 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2306 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2308 /* Otherwise figure out the entries of the PARALLEL. */
2309 for (i = 0; i < n; i++)
2313 case X86_64_NO_CLASS:
2315 case X86_64_INTEGER_CLASS:
2316 case X86_64_INTEGERSI_CLASS:
2317 /* Merge TImodes on aligned occasions here too. */
2318 if (i * 8 + 8 > bytes)
2319 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2320 else if (class[i] == X86_64_INTEGERSI_CLASS)
2324 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2325 if (tmpmode == BLKmode)
2327 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2328 gen_rtx_REG (tmpmode, *intreg),
2332 case X86_64_SSESF_CLASS:
2333 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2334 gen_rtx_REG (SFmode,
2335 SSE_REGNO (sse_regno)),
2339 case X86_64_SSEDF_CLASS:
2340 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2341 gen_rtx_REG (DFmode,
2342 SSE_REGNO (sse_regno)),
2346 case X86_64_SSE_CLASS:
2347 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2351 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2352 gen_rtx_REG (tmpmode,
2353 SSE_REGNO (sse_regno)),
2355 if (tmpmode == TImode)
2363 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2364 for (i = 0; i < nexps; i++)
2365 XVECEXP (ret, 0, i) = exp [i];
2369 /* Update the data in CUM to advance over an argument
2370 of mode MODE and data type TYPE.
2371 (TYPE is null for libcalls where that information may not be available.) */
2374 function_arg_advance (cum, mode, type, named)
2375 CUMULATIVE_ARGS *cum; /* current arg information */
2376 enum machine_mode mode; /* current arg mode */
2377 tree type; /* type of the argument or 0 if lib support */
2378 int named; /* whether or not the argument was named */
2381 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2382 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2384 if (TARGET_DEBUG_ARG)
2386 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2387 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2390 int int_nregs, sse_nregs;
2391 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2392 cum->words += words;
2393 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2395 cum->nregs -= int_nregs;
2396 cum->sse_nregs -= sse_nregs;
2397 cum->regno += int_nregs;
2398 cum->sse_regno += sse_nregs;
2401 cum->words += words;
2405 if (TARGET_SSE && mode == TImode)
2407 cum->sse_words += words;
2408 cum->sse_nregs -= 1;
2409 cum->sse_regno += 1;
2410 if (cum->sse_nregs <= 0)
2418 cum->words += words;
2419 cum->nregs -= words;
2420 cum->regno += words;
2422 if (cum->nregs <= 0)
2432 /* Define where to put the arguments to a function.
2433 Value is zero to push the argument on the stack,
2434 or a hard register in which to store the argument.
2436 MODE is the argument's machine mode.
2437 TYPE is the data type of the argument (as a tree).
2438 This is null for libcalls where that information may
2440 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2441 the preceding args and about the function being called.
2442 NAMED is nonzero if this argument is a named parameter
2443 (otherwise it is an extra parameter matching an ellipsis). */
2446 function_arg (cum, mode, type, named)
2447 CUMULATIVE_ARGS *cum; /* current arg information */
2448 enum machine_mode mode; /* current arg mode */
2449 tree type; /* type of the argument or 0 if lib support */
2450 int named; /* != 0 for normal args, == 0 for ... args */
2454 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2455 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2457 /* Handle a hidden AL argument containing number of registers for varargs
2458 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2460 if (mode == VOIDmode)
2463 return GEN_INT (cum->maybe_vaarg
2464 ? (cum->sse_nregs < 0
2472 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2473 &x86_64_int_parameter_registers [cum->regno],
2478 /* For now, pass fp/complex values on the stack. */
2487 if (words <= cum->nregs)
2489 int regno = cum->regno;
2491 /* Fastcall allocates the first two DWORD (SImode) or
2492 smaller arguments to ECX and EDX. */
2495 if (mode == BLKmode || mode == DImode)
2498 /* ECX not EAX is the first allocated register. */
2502 ret = gen_rtx_REG (mode, regno);
2507 ret = gen_rtx_REG (mode, cum->sse_regno);
2511 if (TARGET_DEBUG_ARG)
2514 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2515 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2518 print_simple_rtl (stderr, ret);
2520 fprintf (stderr, ", stack");
2522 fprintf (stderr, " )\n");
2528 /* A C expression that indicates when an argument must be passed by
2529 reference. If nonzero for an argument, a copy of that argument is
2530 made in memory and a pointer to the argument is passed instead of
2531 the argument itself. The pointer is passed in whatever way is
2532 appropriate for passing a pointer to that type. */
2535 function_arg_pass_by_reference (cum, mode, type, named)
2536 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2537 enum machine_mode mode ATTRIBUTE_UNUSED;
2539 int named ATTRIBUTE_UNUSED;
2544 if (type && int_size_in_bytes (type) == -1)
2546 if (TARGET_DEBUG_ARG)
2547 fprintf (stderr, "function_arg_pass_by_reference\n");
2554 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2557 contains_128bit_aligned_vector_p (type)
2560 enum machine_mode mode = TYPE_MODE (type);
2561 if (SSE_REG_MODE_P (mode)
2562 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2564 if (TYPE_ALIGN (type) < 128)
2567 if (AGGREGATE_TYPE_P (type))
2569 /* Walk the agregates recursivly. */
2570 if (TREE_CODE (type) == RECORD_TYPE
2571 || TREE_CODE (type) == UNION_TYPE
2572 || TREE_CODE (type) == QUAL_UNION_TYPE)
2576 if (TYPE_BINFO (type) != NULL
2577 && TYPE_BINFO_BASETYPES (type) != NULL)
2579 tree bases = TYPE_BINFO_BASETYPES (type);
2580 int n_bases = TREE_VEC_LENGTH (bases);
2583 for (i = 0; i < n_bases; ++i)
2585 tree binfo = TREE_VEC_ELT (bases, i);
2586 tree type = BINFO_TYPE (binfo);
2588 if (contains_128bit_aligned_vector_p (type))
2592 /* And now merge the fields of structure. */
2593 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2595 if (TREE_CODE (field) == FIELD_DECL
2596 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2600 /* Just for use if some languages passes arrays by value. */
2601 else if (TREE_CODE (type) == ARRAY_TYPE)
2603 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2612 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2616 ix86_function_arg_boundary (mode, type)
2617 enum machine_mode mode;
2622 align = TYPE_ALIGN (type);
2624 align = GET_MODE_ALIGNMENT (mode);
2625 if (align < PARM_BOUNDARY)
2626 align = PARM_BOUNDARY;
2629 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2630 make an exception for SSE modes since these require 128bit
2633 The handling here differs from field_alignment. ICC aligns MMX
2634 arguments to 4 byte boundaries, while structure fields are aligned
2635 to 8 byte boundaries. */
2638 if (!SSE_REG_MODE_P (mode))
2639 align = PARM_BOUNDARY;
2643 if (!contains_128bit_aligned_vector_p (type))
2644 align = PARM_BOUNDARY;
2646 if (align != PARM_BOUNDARY && !TARGET_SSE)
2654 /* Return true if N is a possible register number of function value. */
2656 ix86_function_value_regno_p (regno)
2661 return ((regno) == 0
2662 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2663 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2665 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2666 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2667 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2670 /* Define how to find the value returned by a function.
2671 VALTYPE is the data type of the value (as a tree).
2672 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2673 otherwise, FUNC is 0. */
2675 ix86_function_value (valtype)
2680 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2681 REGPARM_MAX, SSE_REGPARM_MAX,
2682 x86_64_int_return_registers, 0);
2683 /* For zero sized structures, construct_container return NULL, but we need
2684 to keep rest of compiler happy by returning meaningful value. */
2686 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2690 return gen_rtx_REG (TYPE_MODE (valtype),
2691 ix86_value_regno (TYPE_MODE (valtype)));
2694 /* Return false iff type is returned in memory. */
2696 ix86_return_in_memory (type)
2699 int needed_intregs, needed_sseregs;
2702 return !examine_argument (TYPE_MODE (type), type, 1,
2703 &needed_intregs, &needed_sseregs);
2707 if (TYPE_MODE (type) == BLKmode)
2709 else if (MS_AGGREGATE_RETURN
2710 && AGGREGATE_TYPE_P (type)
2711 && int_size_in_bytes(type) <= 8)
2713 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2714 && int_size_in_bytes (type) == 8)
2715 || (int_size_in_bytes (type) > 12
2716 && TYPE_MODE (type) != TImode
2717 && TYPE_MODE (type) != TFmode
2718 && !VECTOR_MODE_P (TYPE_MODE (type))))
2724 /* Define how to find the value returned by a library function
2725 assuming the value has mode MODE. */
2727 ix86_libcall_value (mode)
2728 enum machine_mode mode;
2738 return gen_rtx_REG (mode, FIRST_SSE_REG);
2741 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2743 return gen_rtx_REG (mode, 0);
2747 return gen_rtx_REG (mode, ix86_value_regno (mode));
2750 /* Given a mode, return the register to use for a return value. */
2753 ix86_value_regno (mode)
2754 enum machine_mode mode;
2756 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2757 return FIRST_FLOAT_REG;
2758 if (mode == TImode || VECTOR_MODE_P (mode))
2759 return FIRST_SSE_REG;
2763 /* Create the va_list data type. */
2766 ix86_build_va_list ()
2768 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2770 /* For i386 we use plain pointer to argument area. */
2772 return build_pointer_type (char_type_node);
2774 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2775 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2777 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2778 unsigned_type_node);
2779 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2780 unsigned_type_node);
2781 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2783 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2786 DECL_FIELD_CONTEXT (f_gpr) = record;
2787 DECL_FIELD_CONTEXT (f_fpr) = record;
2788 DECL_FIELD_CONTEXT (f_ovf) = record;
2789 DECL_FIELD_CONTEXT (f_sav) = record;
2791 TREE_CHAIN (record) = type_decl;
2792 TYPE_NAME (record) = type_decl;
2793 TYPE_FIELDS (record) = f_gpr;
2794 TREE_CHAIN (f_gpr) = f_fpr;
2795 TREE_CHAIN (f_fpr) = f_ovf;
2796 TREE_CHAIN (f_ovf) = f_sav;
2798 layout_type (record);
2800 /* The correct type is an array type of one element. */
2801 return build_array_type (record, build_index_type (size_zero_node));
2804 /* Perform any needed actions needed for a function that is receiving a
2805 variable number of arguments.
2809 MODE and TYPE are the mode and type of the current parameter.
2811 PRETEND_SIZE is a variable that should be set to the amount of stack
2812 that must be pushed by the prolog to pretend that our caller pushed
2815 Normally, this macro will push all remaining incoming registers on the
2816 stack and set PRETEND_SIZE to the length of the registers pushed. */
2819 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2820 CUMULATIVE_ARGS *cum;
2821 enum machine_mode mode;
2823 int *pretend_size ATTRIBUTE_UNUSED;
2827 CUMULATIVE_ARGS next_cum;
2828 rtx save_area = NULL_RTX, mem;
2841 /* Indicate to allocate space on the stack for varargs save area. */
2842 ix86_save_varrargs_registers = 1;
2844 fntype = TREE_TYPE (current_function_decl);
2845 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2846 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2847 != void_type_node));
2849 /* For varargs, we do not want to skip the dummy va_dcl argument.
2850 For stdargs, we do want to skip the last named argument. */
2853 function_arg_advance (&next_cum, mode, type, 1);
2856 save_area = frame_pointer_rtx;
2858 set = get_varargs_alias_set ();
2860 for (i = next_cum.regno; i < ix86_regparm; i++)
2862 mem = gen_rtx_MEM (Pmode,
2863 plus_constant (save_area, i * UNITS_PER_WORD));
2864 set_mem_alias_set (mem, set);
2865 emit_move_insn (mem, gen_rtx_REG (Pmode,
2866 x86_64_int_parameter_registers[i]));
2869 if (next_cum.sse_nregs)
2871 /* Now emit code to save SSE registers. The AX parameter contains number
2872 of SSE parameter registers used to call this function. We use
2873 sse_prologue_save insn template that produces computed jump across
2874 SSE saves. We need some preparation work to get this working. */
2876 label = gen_label_rtx ();
2877 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2879 /* Compute address to jump to :
2880 label - 5*eax + nnamed_sse_arguments*5 */
2881 tmp_reg = gen_reg_rtx (Pmode);
2882 nsse_reg = gen_reg_rtx (Pmode);
2883 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2884 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2885 gen_rtx_MULT (Pmode, nsse_reg,
2887 if (next_cum.sse_regno)
2890 gen_rtx_CONST (DImode,
2891 gen_rtx_PLUS (DImode,
2893 GEN_INT (next_cum.sse_regno * 4))));
2895 emit_move_insn (nsse_reg, label_ref);
2896 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2898 /* Compute address of memory block we save into. We always use pointer
2899 pointing 127 bytes after first byte to store - this is needed to keep
2900 instruction size limited by 4 bytes. */
2901 tmp_reg = gen_reg_rtx (Pmode);
2902 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2903 plus_constant (save_area,
2904 8 * REGPARM_MAX + 127)));
2905 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2906 set_mem_alias_set (mem, set);
2907 set_mem_align (mem, BITS_PER_WORD);
2909 /* And finally do the dirty job! */
2910 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2911 GEN_INT (next_cum.sse_regno), label));
2916 /* Implement va_start. */
2919 ix86_va_start (valist, nextarg)
2923 HOST_WIDE_INT words, n_gpr, n_fpr;
2924 tree f_gpr, f_fpr, f_ovf, f_sav;
2925 tree gpr, fpr, ovf, sav, t;
2927 /* Only 64bit target needs something special. */
2930 std_expand_builtin_va_start (valist, nextarg);
2934 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2935 f_fpr = TREE_CHAIN (f_gpr);
2936 f_ovf = TREE_CHAIN (f_fpr);
2937 f_sav = TREE_CHAIN (f_ovf);
2939 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2940 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2941 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2942 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2943 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2945 /* Count number of gp and fp argument registers used. */
2946 words = current_function_args_info.words;
2947 n_gpr = current_function_args_info.regno;
2948 n_fpr = current_function_args_info.sse_regno;
2950 if (TARGET_DEBUG_ARG)
2951 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2952 (int) words, (int) n_gpr, (int) n_fpr);
2954 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2955 build_int_2 (n_gpr * 8, 0));
2956 TREE_SIDE_EFFECTS (t) = 1;
2957 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2959 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2960 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2961 TREE_SIDE_EFFECTS (t) = 1;
2962 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2964 /* Find the overflow area. */
2965 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2967 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2968 build_int_2 (words * UNITS_PER_WORD, 0));
2969 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2970 TREE_SIDE_EFFECTS (t) = 1;
2971 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2973 /* Find the register save area.
2974 Prologue of the function save it right above stack frame. */
2975 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2976 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2977 TREE_SIDE_EFFECTS (t) = 1;
2978 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2981 /* Implement va_arg. */
2983 ix86_va_arg (valist, type)
2986 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2987 tree f_gpr, f_fpr, f_ovf, f_sav;
2988 tree gpr, fpr, ovf, sav, t;
2990 rtx lab_false, lab_over = NULL_RTX;
2995 /* Only 64bit target needs something special. */
2998 return std_expand_builtin_va_arg (valist, type);
3001 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3002 f_fpr = TREE_CHAIN (f_gpr);
3003 f_ovf = TREE_CHAIN (f_fpr);
3004 f_sav = TREE_CHAIN (f_ovf);
3006 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3007 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3008 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3009 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3010 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3012 size = int_size_in_bytes (type);
3015 /* Passed by reference. */
3017 type = build_pointer_type (type);
3018 size = int_size_in_bytes (type);
3020 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3022 container = construct_container (TYPE_MODE (type), type, 0,
3023 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3025 * Pull the value out of the saved registers ...
3028 addr_rtx = gen_reg_rtx (Pmode);
3032 rtx int_addr_rtx, sse_addr_rtx;
3033 int needed_intregs, needed_sseregs;
3036 lab_over = gen_label_rtx ();
3037 lab_false = gen_label_rtx ();
3039 examine_argument (TYPE_MODE (type), type, 0,
3040 &needed_intregs, &needed_sseregs);
3043 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3044 || TYPE_ALIGN (type) > 128);
3046 /* In case we are passing structure, verify that it is consecutive block
3047 on the register save area. If not we need to do moves. */
3048 if (!need_temp && !REG_P (container))
3050 /* Verify that all registers are strictly consecutive */
3051 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3055 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3057 rtx slot = XVECEXP (container, 0, i);
3058 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3059 || INTVAL (XEXP (slot, 1)) != i * 16)
3067 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3069 rtx slot = XVECEXP (container, 0, i);
3070 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3071 || INTVAL (XEXP (slot, 1)) != i * 8)
3078 int_addr_rtx = addr_rtx;
3079 sse_addr_rtx = addr_rtx;
3083 int_addr_rtx = gen_reg_rtx (Pmode);
3084 sse_addr_rtx = gen_reg_rtx (Pmode);
3086 /* First ensure that we fit completely in registers. */
3089 emit_cmp_and_jump_insns (expand_expr
3090 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3091 GEN_INT ((REGPARM_MAX - needed_intregs +
3092 1) * 8), GE, const1_rtx, SImode,
3097 emit_cmp_and_jump_insns (expand_expr
3098 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3099 GEN_INT ((SSE_REGPARM_MAX -
3100 needed_sseregs + 1) * 16 +
3101 REGPARM_MAX * 8), GE, const1_rtx,
3102 SImode, 1, lab_false);
3105 /* Compute index to start of area used for integer regs. */
3108 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3109 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3110 if (r != int_addr_rtx)
3111 emit_move_insn (int_addr_rtx, r);
3115 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3116 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3117 if (r != sse_addr_rtx)
3118 emit_move_insn (sse_addr_rtx, r);
3125 /* Never use the memory itself, as it has the alias set. */
3126 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3127 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3128 set_mem_alias_set (mem, get_varargs_alias_set ());
3129 set_mem_align (mem, BITS_PER_UNIT);
3131 for (i = 0; i < XVECLEN (container, 0); i++)
3133 rtx slot = XVECEXP (container, 0, i);
3134 rtx reg = XEXP (slot, 0);
3135 enum machine_mode mode = GET_MODE (reg);
3141 if (SSE_REGNO_P (REGNO (reg)))
3143 src_addr = sse_addr_rtx;
3144 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3148 src_addr = int_addr_rtx;
3149 src_offset = REGNO (reg) * 8;
3151 src_mem = gen_rtx_MEM (mode, src_addr);
3152 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3153 src_mem = adjust_address (src_mem, mode, src_offset);
3154 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3155 emit_move_insn (dest_mem, src_mem);
3162 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3163 build_int_2 (needed_intregs * 8, 0));
3164 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3165 TREE_SIDE_EFFECTS (t) = 1;
3166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3171 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3172 build_int_2 (needed_sseregs * 16, 0));
3173 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3174 TREE_SIDE_EFFECTS (t) = 1;
3175 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3178 emit_jump_insn (gen_jump (lab_over));
3180 emit_label (lab_false);
3183 /* ... otherwise out of the overflow area. */
3185 /* Care for on-stack alignment if needed. */
3186 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3190 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3191 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3192 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3196 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3198 emit_move_insn (addr_rtx, r);
3201 build (PLUS_EXPR, TREE_TYPE (t), t,
3202 build_int_2 (rsize * UNITS_PER_WORD, 0));
3203 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3204 TREE_SIDE_EFFECTS (t) = 1;
3205 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3208 emit_label (lab_over);
3212 r = gen_rtx_MEM (Pmode, addr_rtx);
3213 set_mem_alias_set (r, get_varargs_alias_set ());
3214 emit_move_insn (addr_rtx, r);
3220 /* Return nonzero if OP is either a i387 or SSE fp register. */
3222 any_fp_register_operand (op, mode)
3224 enum machine_mode mode ATTRIBUTE_UNUSED;
3226 return ANY_FP_REG_P (op);
3229 /* Return nonzero if OP is an i387 fp register. */
3231 fp_register_operand (op, mode)
3233 enum machine_mode mode ATTRIBUTE_UNUSED;
3235 return FP_REG_P (op);
3238 /* Return nonzero if OP is a non-fp register_operand. */
3240 register_and_not_any_fp_reg_operand (op, mode)
3242 enum machine_mode mode;
3244 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3247 /* Return nonzero if OP is a register operand other than an
3248 i387 fp register. */
3250 register_and_not_fp_reg_operand (op, mode)
3252 enum machine_mode mode;
3254 return register_operand (op, mode) && !FP_REG_P (op);
3257 /* Return nonzero if OP is general operand representable on x86_64. */
3260 x86_64_general_operand (op, mode)
3262 enum machine_mode mode;
3265 return general_operand (op, mode);
3266 if (nonimmediate_operand (op, mode))
3268 return x86_64_sign_extended_value (op);
3271 /* Return nonzero if OP is general operand representable on x86_64
3272 as either sign extended or zero extended constant. */
3275 x86_64_szext_general_operand (op, mode)
3277 enum machine_mode mode;
3280 return general_operand (op, mode);
3281 if (nonimmediate_operand (op, mode))
3283 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3286 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3289 x86_64_nonmemory_operand (op, mode)
3291 enum machine_mode mode;
3294 return nonmemory_operand (op, mode);
3295 if (register_operand (op, mode))
3297 return x86_64_sign_extended_value (op);
3300 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3303 x86_64_movabs_operand (op, mode)
3305 enum machine_mode mode;
3307 if (!TARGET_64BIT || !flag_pic)
3308 return nonmemory_operand (op, mode);
3309 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3311 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3316 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3319 x86_64_szext_nonmemory_operand (op, mode)
3321 enum machine_mode mode;
3324 return nonmemory_operand (op, mode);
3325 if (register_operand (op, mode))
3327 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3330 /* Return nonzero if OP is immediate operand representable on x86_64. */
3333 x86_64_immediate_operand (op, mode)
3335 enum machine_mode mode;
3338 return immediate_operand (op, mode);
3339 return x86_64_sign_extended_value (op);
3342 /* Return nonzero if OP is immediate operand representable on x86_64. */
3345 x86_64_zext_immediate_operand (op, mode)
3347 enum machine_mode mode ATTRIBUTE_UNUSED;
3349 return x86_64_zero_extended_value (op);
3352 /* Return nonzero if OP is (const_int 1), else return zero. */
3355 const_int_1_operand (op, mode)
3357 enum machine_mode mode ATTRIBUTE_UNUSED;
3359 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3362 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3363 for shift & compare patterns, as shifting by 0 does not change flags),
3364 else return zero. */
3367 const_int_1_31_operand (op, mode)
3369 enum machine_mode mode ATTRIBUTE_UNUSED;
3371 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3374 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3375 reference and a constant. */
3378 symbolic_operand (op, mode)
3380 enum machine_mode mode ATTRIBUTE_UNUSED;
3382 switch (GET_CODE (op))
3390 if (GET_CODE (op) == SYMBOL_REF
3391 || GET_CODE (op) == LABEL_REF
3392 || (GET_CODE (op) == UNSPEC
3393 && (XINT (op, 1) == UNSPEC_GOT
3394 || XINT (op, 1) == UNSPEC_GOTOFF
3395 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3397 if (GET_CODE (op) != PLUS
3398 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3402 if (GET_CODE (op) == SYMBOL_REF
3403 || GET_CODE (op) == LABEL_REF)
3405 /* Only @GOTOFF gets offsets. */
3406 if (GET_CODE (op) != UNSPEC
3407 || XINT (op, 1) != UNSPEC_GOTOFF)
3410 op = XVECEXP (op, 0, 0);
3411 if (GET_CODE (op) == SYMBOL_REF
3412 || GET_CODE (op) == LABEL_REF)
3421 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3424 pic_symbolic_operand (op, mode)
3426 enum machine_mode mode ATTRIBUTE_UNUSED;
3428 if (GET_CODE (op) != CONST)
3433 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3438 if (GET_CODE (op) == UNSPEC)
3440 if (GET_CODE (op) != PLUS
3441 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3444 if (GET_CODE (op) == UNSPEC)
3450 /* Return true if OP is a symbolic operand that resolves locally. */
3453 local_symbolic_operand (op, mode)
3455 enum machine_mode mode ATTRIBUTE_UNUSED;
3457 if (GET_CODE (op) == CONST
3458 && GET_CODE (XEXP (op, 0)) == PLUS
3459 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3460 op = XEXP (XEXP (op, 0), 0);
3462 if (GET_CODE (op) == LABEL_REF)
3465 if (GET_CODE (op) != SYMBOL_REF)
3468 /* These we've been told are local by varasm and encode_section_info
3470 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3473 /* There is, however, a not insubstantial body of code in the rest of
3474 the compiler that assumes it can just stick the results of
3475 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3476 /* ??? This is a hack. Should update the body of the compiler to
3477 always create a DECL an invoke targetm.encode_section_info. */
3478 if (strncmp (XSTR (op, 0), internal_label_prefix,
3479 internal_label_prefix_len) == 0)
3485 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3488 tls_symbolic_operand (op, mode)
3490 enum machine_mode mode ATTRIBUTE_UNUSED;
3492 const char *symbol_str;
3494 if (GET_CODE (op) != SYMBOL_REF)
3496 symbol_str = XSTR (op, 0);
3498 if (symbol_str[0] != '%')
3500 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3504 tls_symbolic_operand_1 (op, kind)
3506 enum tls_model kind;
3508 const char *symbol_str;
3510 if (GET_CODE (op) != SYMBOL_REF)
3512 symbol_str = XSTR (op, 0);
3514 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3518 global_dynamic_symbolic_operand (op, mode)
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3522 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3526 local_dynamic_symbolic_operand (op, mode)
3528 enum machine_mode mode ATTRIBUTE_UNUSED;
3530 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3534 initial_exec_symbolic_operand (op, mode)
3536 enum machine_mode mode ATTRIBUTE_UNUSED;
3538 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3542 local_exec_symbolic_operand (op, mode)
3544 enum machine_mode mode ATTRIBUTE_UNUSED;
3546 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3549 /* Test for a valid operand for a call instruction. Don't allow the
3550 arg pointer register or virtual regs since they may decay into
3551 reg + const, which the patterns can't handle. */
3554 call_insn_operand (op, mode)
3556 enum machine_mode mode ATTRIBUTE_UNUSED;
3558 /* Disallow indirect through a virtual register. This leads to
3559 compiler aborts when trying to eliminate them. */
3560 if (GET_CODE (op) == REG
3561 && (op == arg_pointer_rtx
3562 || op == frame_pointer_rtx
3563 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3564 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3567 /* Disallow `call 1234'. Due to varying assembler lameness this
3568 gets either rejected or translated to `call .+1234'. */
3569 if (GET_CODE (op) == CONST_INT)
3572 /* Explicitly allow SYMBOL_REF even if pic. */
3573 if (GET_CODE (op) == SYMBOL_REF)
3576 /* Otherwise we can allow any general_operand in the address. */
3577 return general_operand (op, Pmode);
3580 /* Test for a valid operand for a call instruction. Don't allow the
3581 arg pointer register or virtual regs since they may decay into
3582 reg + const, which the patterns can't handle. */
3585 sibcall_insn_operand (op, mode)
3587 enum machine_mode mode ATTRIBUTE_UNUSED;
3589 /* Disallow indirect through a virtual register. This leads to
3590 compiler aborts when trying to eliminate them. */
3591 if (GET_CODE (op) == REG
3592 && (op == arg_pointer_rtx
3593 || op == frame_pointer_rtx
3594 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3595 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3598 /* Explicitly allow SYMBOL_REF even if pic. */
3599 if (GET_CODE (op) == SYMBOL_REF)
3602 /* Otherwise we can only allow register operands. */
3603 return register_operand (op, Pmode);
3607 constant_call_address_operand (op, mode)
3609 enum machine_mode mode ATTRIBUTE_UNUSED;
3611 if (GET_CODE (op) == CONST
3612 && GET_CODE (XEXP (op, 0)) == PLUS
3613 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3614 op = XEXP (XEXP (op, 0), 0);
3615 return GET_CODE (op) == SYMBOL_REF;
3618 /* Match exactly zero and one. */
3621 const0_operand (op, mode)
3623 enum machine_mode mode;
3625 return op == CONST0_RTX (mode);
3629 const1_operand (op, mode)
3631 enum machine_mode mode ATTRIBUTE_UNUSED;
3633 return op == const1_rtx;
3636 /* Match 2, 4, or 8. Used for leal multiplicands. */
3639 const248_operand (op, mode)
3641 enum machine_mode mode ATTRIBUTE_UNUSED;
3643 return (GET_CODE (op) == CONST_INT
3644 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3647 /* True if this is a constant appropriate for an increment or decrement. */
3650 incdec_operand (op, mode)
3652 enum machine_mode mode ATTRIBUTE_UNUSED;
3654 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3655 registers, since carry flag is not set. */
3656 if (TARGET_PENTIUM4 && !optimize_size)
3658 return op == const1_rtx || op == constm1_rtx;
3661 /* Return nonzero if OP is acceptable as operand of DImode shift
3665 shiftdi_operand (op, mode)
3667 enum machine_mode mode ATTRIBUTE_UNUSED;
3670 return nonimmediate_operand (op, mode);
3672 return register_operand (op, mode);
3675 /* Return false if this is the stack pointer, or any other fake
3676 register eliminable to the stack pointer. Otherwise, this is
3679 This is used to prevent esp from being used as an index reg.
3680 Which would only happen in pathological cases. */
3683 reg_no_sp_operand (op, mode)
3685 enum machine_mode mode;
3688 if (GET_CODE (t) == SUBREG)
3690 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3693 return register_operand (op, mode);
3697 mmx_reg_operand (op, mode)
3699 enum machine_mode mode ATTRIBUTE_UNUSED;
3701 return MMX_REG_P (op);
3704 /* Return false if this is any eliminable register. Otherwise
3708 general_no_elim_operand (op, mode)
3710 enum machine_mode mode;
3713 if (GET_CODE (t) == SUBREG)
3715 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3716 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3717 || t == virtual_stack_dynamic_rtx)
3720 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3721 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3724 return general_operand (op, mode);
3727 /* Return false if this is any eliminable register. Otherwise
3728 register_operand or const_int. */
3731 nonmemory_no_elim_operand (op, mode)
3733 enum machine_mode mode;
3736 if (GET_CODE (t) == SUBREG)
3738 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3739 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3740 || t == virtual_stack_dynamic_rtx)
3743 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3746 /* Return false if this is any eliminable register or stack register,
3747 otherwise work like register_operand. */
3750 index_register_operand (op, mode)
3752 enum machine_mode mode;
3755 if (GET_CODE (t) == SUBREG)
3759 if (t == arg_pointer_rtx
3760 || t == frame_pointer_rtx
3761 || t == virtual_incoming_args_rtx
3762 || t == virtual_stack_vars_rtx
3763 || t == virtual_stack_dynamic_rtx
3764 || REGNO (t) == STACK_POINTER_REGNUM)
3767 return general_operand (op, mode);
3770 /* Return true if op is a Q_REGS class register. */
3773 q_regs_operand (op, mode)
3775 enum machine_mode mode;
3777 if (mode != VOIDmode && GET_MODE (op) != mode)
3779 if (GET_CODE (op) == SUBREG)
3780 op = SUBREG_REG (op);
3781 return ANY_QI_REG_P (op);
3784 /* Return true if op is an flags register. */
3787 flags_reg_operand (op, mode)
3789 enum machine_mode mode;
3791 if (mode != VOIDmode && GET_MODE (op) != mode)
3793 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3796 /* Return true if op is a NON_Q_REGS class register. */
3799 non_q_regs_operand (op, mode)
3801 enum machine_mode mode;
3803 if (mode != VOIDmode && GET_MODE (op) != mode)
3805 if (GET_CODE (op) == SUBREG)
3806 op = SUBREG_REG (op);
3807 return NON_QI_REG_P (op);
3811 zero_extended_scalar_load_operand (op, mode)
3813 enum machine_mode mode ATTRIBUTE_UNUSED;
3816 if (GET_CODE (op) != MEM)
3818 op = maybe_get_pool_constant (op);
3821 if (GET_CODE (op) != CONST_VECTOR)
3824 (GET_MODE_SIZE (GET_MODE (op)) /
3825 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3826 for (n_elts--; n_elts > 0; n_elts--)
3828 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3829 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3835 /* Return 1 when OP is operand acceptable for standard SSE move. */
3837 vector_move_operand (op, mode)
3839 enum machine_mode mode;
3841 if (nonimmediate_operand (op, mode))
3843 if (GET_MODE (op) != mode && mode != VOIDmode)
3845 return (op == CONST0_RTX (GET_MODE (op)));
3848 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3851 sse_comparison_operator (op, mode)
3853 enum machine_mode mode ATTRIBUTE_UNUSED;
3855 enum rtx_code code = GET_CODE (op);
3858 /* Operations supported directly. */
3868 /* These are equivalent to ones above in non-IEEE comparisons. */
3875 return !TARGET_IEEE_FP;
3880 /* Return 1 if OP is a valid comparison operator in valid mode. */
3882 ix86_comparison_operator (op, mode)
3884 enum machine_mode mode;
3886 enum machine_mode inmode;
3887 enum rtx_code code = GET_CODE (op);
3888 if (mode != VOIDmode && GET_MODE (op) != mode)
3890 if (GET_RTX_CLASS (code) != '<')
3892 inmode = GET_MODE (XEXP (op, 0));
3894 if (inmode == CCFPmode || inmode == CCFPUmode)
3896 enum rtx_code second_code, bypass_code;
3897 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3898 return (bypass_code == NIL && second_code == NIL);
3905 if (inmode == CCmode || inmode == CCGCmode
3906 || inmode == CCGOCmode || inmode == CCNOmode)
3909 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3910 if (inmode == CCmode)
3914 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3922 /* Return 1 if OP is a valid comparison operator testing carry flag
3925 ix86_carry_flag_operator (op, mode)
3927 enum machine_mode mode;
3929 enum machine_mode inmode;
3930 enum rtx_code code = GET_CODE (op);
3932 if (mode != VOIDmode && GET_MODE (op) != mode)
3934 if (GET_RTX_CLASS (code) != '<')
3936 inmode = GET_MODE (XEXP (op, 0));
3937 if (GET_CODE (XEXP (op, 0)) != REG
3938 || REGNO (XEXP (op, 0)) != 17
3939 || XEXP (op, 1) != const0_rtx)
3942 if (inmode == CCFPmode || inmode == CCFPUmode)
3944 enum rtx_code second_code, bypass_code;
3946 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3947 if (bypass_code != NIL || second_code != NIL)
3949 code = ix86_fp_compare_code_to_integer (code);
3951 else if (inmode != CCmode)
3956 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3959 fcmov_comparison_operator (op, mode)
3961 enum machine_mode mode;
3963 enum machine_mode inmode;
3964 enum rtx_code code = GET_CODE (op);
3966 if (mode != VOIDmode && GET_MODE (op) != mode)
3968 if (GET_RTX_CLASS (code) != '<')
3970 inmode = GET_MODE (XEXP (op, 0));
3971 if (inmode == CCFPmode || inmode == CCFPUmode)
3973 enum rtx_code second_code, bypass_code;
3975 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3976 if (bypass_code != NIL || second_code != NIL)
3978 code = ix86_fp_compare_code_to_integer (code);
3980 /* i387 supports just limited amount of conditional codes. */
3983 case LTU: case GTU: case LEU: case GEU:
3984 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3987 case ORDERED: case UNORDERED:
3995 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3998 promotable_binary_operator (op, mode)
4000 enum machine_mode mode ATTRIBUTE_UNUSED;
4002 switch (GET_CODE (op))
4005 /* Modern CPUs have same latency for HImode and SImode multiply,
4006 but 386 and 486 do HImode multiply faster. */
4007 return ix86_cpu > PROCESSOR_I486;
4019 /* Nearly general operand, but accept any const_double, since we wish
4020 to be able to drop them into memory rather than have them get pulled
4024 cmp_fp_expander_operand (op, mode)
4026 enum machine_mode mode;
4028 if (mode != VOIDmode && mode != GET_MODE (op))
4030 if (GET_CODE (op) == CONST_DOUBLE)
4032 return general_operand (op, mode);
4035 /* Match an SI or HImode register for a zero_extract. */
4038 ext_register_operand (op, mode)
4040 enum machine_mode mode ATTRIBUTE_UNUSED;
4043 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4044 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4047 if (!register_operand (op, VOIDmode))
4050 /* Be careful to accept only registers having upper parts. */
4051 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4052 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4055 /* Return 1 if this is a valid binary floating-point operation.
4056 OP is the expression matched, and MODE is its mode. */
4059 binary_fp_operator (op, mode)
4061 enum machine_mode mode;
4063 if (mode != VOIDmode && mode != GET_MODE (op))
4066 switch (GET_CODE (op))
4072 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4080 mult_operator (op, mode)
4082 enum machine_mode mode ATTRIBUTE_UNUSED;
4084 return GET_CODE (op) == MULT;
4088 div_operator (op, mode)
4090 enum machine_mode mode ATTRIBUTE_UNUSED;
4092 return GET_CODE (op) == DIV;
4096 arith_or_logical_operator (op, mode)
4098 enum machine_mode mode;
4100 return ((mode == VOIDmode || GET_MODE (op) == mode)
4101 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4102 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4105 /* Returns 1 if OP is memory operand with a displacement. */
4108 memory_displacement_operand (op, mode)
4110 enum machine_mode mode;
4112 struct ix86_address parts;
4114 if (! memory_operand (op, mode))
4117 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4120 return parts.disp != NULL_RTX;
4123 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4124 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4126 ??? It seems likely that this will only work because cmpsi is an
4127 expander, and no actual insns use this. */
4130 cmpsi_operand (op, mode)
4132 enum machine_mode mode;
4134 if (nonimmediate_operand (op, mode))
4137 if (GET_CODE (op) == AND
4138 && GET_MODE (op) == SImode
4139 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4140 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4141 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4142 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4143 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4144 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4150 /* Returns 1 if OP is memory operand that can not be represented by the
4154 long_memory_operand (op, mode)
4156 enum machine_mode mode;
4158 if (! memory_operand (op, mode))
4161 return memory_address_length (op) != 0;
4164 /* Return nonzero if the rtx is known aligned. */
4167 aligned_operand (op, mode)
4169 enum machine_mode mode;
4171 struct ix86_address parts;
4173 if (!general_operand (op, mode))
4176 /* Registers and immediate operands are always "aligned". */
4177 if (GET_CODE (op) != MEM)
4180 /* Don't even try to do any aligned optimizations with volatiles. */
4181 if (MEM_VOLATILE_P (op))
4186 /* Pushes and pops are only valid on the stack pointer. */
4187 if (GET_CODE (op) == PRE_DEC
4188 || GET_CODE (op) == POST_INC)
4191 /* Decode the address. */
4192 if (! ix86_decompose_address (op, &parts))
4195 if (parts.base && GET_CODE (parts.base) == SUBREG)
4196 parts.base = SUBREG_REG (parts.base);
4197 if (parts.index && GET_CODE (parts.index) == SUBREG)
4198 parts.index = SUBREG_REG (parts.index);
4200 /* Look for some component that isn't known to be aligned. */
4204 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4209 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4214 if (GET_CODE (parts.disp) != CONST_INT
4215 || (INTVAL (parts.disp) & 3) != 0)
4219 /* Didn't find one -- this must be an aligned address. */
4223 /* Initialize the table of extra 80387 mathematical constants. */
4226 init_ext_80387_constants ()
4228 static const char * cst[5] =
4230 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4231 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4232 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4233 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4234 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4238 for (i = 0; i < 5; i++)
4240 real_from_string (&ext_80387_constants_table[i], cst[i]);
4241 /* Ensure each constant is rounded to XFmode precision. */
4242 real_convert (&ext_80387_constants_table[i], XFmode,
4243 &ext_80387_constants_table[i]);
4246 ext_80387_constants_init = 1;
4249 /* Return true if the constant is something that can be loaded with
4250 a special instruction. */
4253 standard_80387_constant_p (x)
4256 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4259 if (x == CONST0_RTX (GET_MODE (x)))
4261 if (x == CONST1_RTX (GET_MODE (x)))
4264 /* For XFmode constants, try to find a special 80387 instruction on
4265 those CPUs that benefit from them. */
4266 if (GET_MODE (x) == XFmode
4267 && x86_ext_80387_constants & CPUMASK)
4272 if (! ext_80387_constants_init)
4273 init_ext_80387_constants ();
4275 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4276 for (i = 0; i < 5; i++)
4277 if (real_identical (&r, &ext_80387_constants_table[i]))
4284 /* Return the opcode of the special instruction to be used to load
4288 standard_80387_constant_opcode (x)
4291 switch (standard_80387_constant_p (x))
4311 /* Return the CONST_DOUBLE representing the 80387 constant that is
4312 loaded by the specified special instruction. The argument IDX
4313 matches the return value from standard_80387_constant_p. */
4316 standard_80387_constant_rtx (idx)
4321 if (! ext_80387_constants_init)
4322 init_ext_80387_constants ();
4338 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4341 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4344 standard_sse_constant_p (x)
4347 if (x == const0_rtx)
4349 return (x == CONST0_RTX (GET_MODE (x)));
4352 /* Returns 1 if OP contains a symbol reference */
4355 symbolic_reference_mentioned_p (op)
4358 register const char *fmt;
4361 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4364 fmt = GET_RTX_FORMAT (GET_CODE (op));
4365 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4371 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4372 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4376 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4383 /* Return 1 if it is appropriate to emit `ret' instructions in the
4384 body of a function. Do this only if the epilogue is simple, needing a
4385 couple of insns. Prior to reloading, we can't tell how many registers
4386 must be saved, so return 0 then. Return 0 if there is no frame
4387 marker to de-allocate.
4389 If NON_SAVING_SETJMP is defined and true, then it is not possible
4390 for the epilogue to be simple, so return 0. This is a special case
4391 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4392 until final, but jump_optimize may need to know sooner if a
4396 ix86_can_use_return_insn_p ()
4398 struct ix86_frame frame;
4400 #ifdef NON_SAVING_SETJMP
4401 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4405 if (! reload_completed || frame_pointer_needed)
4408 /* Don't allow more than 32 pop, since that's all we can do
4409 with one instruction. */
4410 if (current_function_pops_args
4411 && current_function_args_size >= 32768)
4414 ix86_compute_frame_layout (&frame);
4415 return frame.to_allocate == 0 && frame.nregs == 0;
4418 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4420 x86_64_sign_extended_value (value)
4423 switch (GET_CODE (value))
4425 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4426 to be at least 32 and this all acceptable constants are
4427 represented as CONST_INT. */
4429 if (HOST_BITS_PER_WIDE_INT == 32)
4433 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4434 return trunc_int_for_mode (val, SImode) == val;
4438 /* For certain code models, the symbolic references are known to fit.
4439 in CM_SMALL_PIC model we know it fits if it is local to the shared
4440 library. Don't count TLS SYMBOL_REFs here, since they should fit
4441 only if inside of UNSPEC handled below. */
4443 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4445 /* For certain code models, the code is near as well. */
4447 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4448 || ix86_cmodel == CM_KERNEL);
4450 /* We also may accept the offsetted memory references in certain special
4453 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4454 switch (XINT (XEXP (value, 0), 1))
4456 case UNSPEC_GOTPCREL:
4458 case UNSPEC_GOTNTPOFF:
4464 if (GET_CODE (XEXP (value, 0)) == PLUS)
4466 rtx op1 = XEXP (XEXP (value, 0), 0);
4467 rtx op2 = XEXP (XEXP (value, 0), 1);
4468 HOST_WIDE_INT offset;
4470 if (ix86_cmodel == CM_LARGE)
4472 if (GET_CODE (op2) != CONST_INT)
4474 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4475 switch (GET_CODE (op1))
4478 /* For CM_SMALL assume that latest object is 16MB before
4479 end of 31bits boundary. We may also accept pretty
4480 large negative constants knowing that all objects are
4481 in the positive half of address space. */
4482 if (ix86_cmodel == CM_SMALL
4483 && offset < 16*1024*1024
4484 && trunc_int_for_mode (offset, SImode) == offset)
4486 /* For CM_KERNEL we know that all object resist in the
4487 negative half of 32bits address space. We may not
4488 accept negative offsets, since they may be just off
4489 and we may accept pretty large positive ones. */
4490 if (ix86_cmodel == CM_KERNEL
4492 && trunc_int_for_mode (offset, SImode) == offset)
4496 /* These conditions are similar to SYMBOL_REF ones, just the
4497 constraints for code models differ. */
4498 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4499 && offset < 16*1024*1024
4500 && trunc_int_for_mode (offset, SImode) == offset)
4502 if (ix86_cmodel == CM_KERNEL
4504 && trunc_int_for_mode (offset, SImode) == offset)
4508 switch (XINT (op1, 1))
4513 && trunc_int_for_mode (offset, SImode) == offset)
4527 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4529 x86_64_zero_extended_value (value)
4532 switch (GET_CODE (value))
4535 if (HOST_BITS_PER_WIDE_INT == 32)
4536 return (GET_MODE (value) == VOIDmode
4537 && !CONST_DOUBLE_HIGH (value));
4541 if (HOST_BITS_PER_WIDE_INT == 32)
4542 return INTVAL (value) >= 0;
4544 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4547 /* For certain code models, the symbolic references are known to fit. */
4549 return ix86_cmodel == CM_SMALL;
4551 /* For certain code models, the code is near as well. */
4553 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4555 /* We also may accept the offsetted memory references in certain special
4558 if (GET_CODE (XEXP (value, 0)) == PLUS)
4560 rtx op1 = XEXP (XEXP (value, 0), 0);
4561 rtx op2 = XEXP (XEXP (value, 0), 1);
4563 if (ix86_cmodel == CM_LARGE)
4565 switch (GET_CODE (op1))
4569 /* For small code model we may accept pretty large positive
4570 offsets, since one bit is available for free. Negative
4571 offsets are limited by the size of NULL pointer area
4572 specified by the ABI. */
4573 if (ix86_cmodel == CM_SMALL
4574 && GET_CODE (op2) == CONST_INT
4575 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4576 && (trunc_int_for_mode (INTVAL (op2), SImode)
4579 /* ??? For the kernel, we may accept adjustment of
4580 -0x10000000, since we know that it will just convert
4581 negative address space to positive, but perhaps this
4582 is not worthwhile. */
4585 /* These conditions are similar to SYMBOL_REF ones, just the
4586 constraints for code models differ. */
4587 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4588 && GET_CODE (op2) == CONST_INT
4589 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4590 && (trunc_int_for_mode (INTVAL (op2), SImode)
4604 /* Value should be nonzero if functions must have frame pointers.
4605 Zero means the frame pointer need not be set up (and parms may
4606 be accessed via the stack pointer) in functions that seem suitable. */
4609 ix86_frame_pointer_required ()
4611 /* If we accessed previous frames, then the generated code expects
4612 to be able to access the saved ebp value in our frame. */
4613 if (cfun->machine->accesses_prev_frame)
4616 /* Several x86 os'es need a frame pointer for other reasons,
4617 usually pertaining to setjmp. */
4618 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4621 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4622 the frame pointer by default. Turn it back on now if we've not
4623 got a leaf function. */
4624 if (TARGET_OMIT_LEAF_FRAME_POINTER
4625 && (!current_function_is_leaf))
4628 if (current_function_profile)
4634 /* Record that the current function accesses previous call frames. */
4637 ix86_setup_frame_addresses ()
4639 cfun->machine->accesses_prev_frame = 1;
4642 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4643 # define USE_HIDDEN_LINKONCE 1
4645 # define USE_HIDDEN_LINKONCE 0
4648 static int pic_labels_used;
4650 /* Fills in the label name that should be used for a pc thunk for
4651 the given register. */
4654 get_pc_thunk_name (name, regno)
4658 if (USE_HIDDEN_LINKONCE)
4659 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4661 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4665 /* This function generates code for -fpic that loads %ebx with
4666 the return address of the caller and then returns. */
4669 ix86_asm_file_end (file)
4675 for (regno = 0; regno < 8; ++regno)
4679 if (! ((pic_labels_used >> regno) & 1))
4682 get_pc_thunk_name (name, regno);
4684 if (USE_HIDDEN_LINKONCE)
4688 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4690 TREE_PUBLIC (decl) = 1;
4691 TREE_STATIC (decl) = 1;
4692 DECL_ONE_ONLY (decl) = 1;
4694 (*targetm.asm_out.unique_section) (decl, 0);
4695 named_section (decl, NULL, 0);
4697 (*targetm.asm_out.globalize_label) (file, name);
4698 fputs ("\t.hidden\t", file);
4699 assemble_name (file, name);
4701 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4706 ASM_OUTPUT_LABEL (file, name);
4709 xops[0] = gen_rtx_REG (SImode, regno);
4710 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4711 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4712 output_asm_insn ("ret", xops);
4716 /* Emit code for the SET_GOT patterns. */
4719 output_set_got (dest)
4725 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4727 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4729 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4732 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4734 output_asm_insn ("call\t%a2", xops);
4737 /* Output the "canonical" label name ("Lxx$pb") here too. This
4738 is what will be referred to by the Mach-O PIC subsystem. */
4739 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4741 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4742 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4745 output_asm_insn ("pop{l}\t%0", xops);
4750 get_pc_thunk_name (name, REGNO (dest));
4751 pic_labels_used |= 1 << REGNO (dest);
4753 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4754 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4755 output_asm_insn ("call\t%X2", xops);
4758 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4759 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4760 else if (!TARGET_MACHO)
4761 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4766 /* Generate an "push" pattern for input ARG. */
4772 return gen_rtx_SET (VOIDmode,
4774 gen_rtx_PRE_DEC (Pmode,
4775 stack_pointer_rtx)),
4779 /* Return >= 0 if there is an unused call-clobbered register available
4780 for the entire function. */
4783 ix86_select_alt_pic_regnum ()
4785 if (current_function_is_leaf && !current_function_profile)
4788 for (i = 2; i >= 0; --i)
4789 if (!regs_ever_live[i])
4793 return INVALID_REGNUM;
4796 /* Return 1 if we need to save REGNO. */
4798 ix86_save_reg (regno, maybe_eh_return)
4800 int maybe_eh_return;
4802 if (pic_offset_table_rtx
4803 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4804 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4805 || current_function_profile
4806 || current_function_calls_eh_return))
4808 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4813 if (current_function_calls_eh_return && maybe_eh_return)
4818 unsigned test = EH_RETURN_DATA_REGNO (i);
4819 if (test == INVALID_REGNUM)
4826 return (regs_ever_live[regno]
4827 && !call_used_regs[regno]
4828 && !fixed_regs[regno]
4829 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4832 /* Return number of registers to be saved on the stack. */
4840 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4841 if (ix86_save_reg (regno, true))
4846 /* Return the offset between two registers, one to be eliminated, and the other
4847 its replacement, at the start of a routine. */
4850 ix86_initial_elimination_offset (from, to)
4854 struct ix86_frame frame;
4855 ix86_compute_frame_layout (&frame);
4857 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4858 return frame.hard_frame_pointer_offset;
4859 else if (from == FRAME_POINTER_REGNUM
4860 && to == HARD_FRAME_POINTER_REGNUM)
4861 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4864 if (to != STACK_POINTER_REGNUM)
4866 else if (from == ARG_POINTER_REGNUM)
4867 return frame.stack_pointer_offset;
4868 else if (from != FRAME_POINTER_REGNUM)
4871 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4875 /* Fill structure ix86_frame about frame of currently computed function. */
4878 ix86_compute_frame_layout (frame)
4879 struct ix86_frame *frame;
4881 HOST_WIDE_INT total_size;
4882 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4884 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4885 HOST_WIDE_INT size = get_frame_size ();
4887 frame->nregs = ix86_nsaved_regs ();
4890 /* Skip return address and saved base pointer. */
4891 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4893 frame->hard_frame_pointer_offset = offset;
4895 /* Do some sanity checking of stack_alignment_needed and
4896 preferred_alignment, since i386 port is the only using those features
4897 that may break easily. */
4899 if (size && !stack_alignment_needed)
4901 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4903 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4905 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4908 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4909 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4911 /* Register save area */
4912 offset += frame->nregs * UNITS_PER_WORD;
4915 if (ix86_save_varrargs_registers)
4917 offset += X86_64_VARARGS_SIZE;
4918 frame->va_arg_size = X86_64_VARARGS_SIZE;
4921 frame->va_arg_size = 0;
4923 /* Align start of frame for local function. */
4924 frame->padding1 = ((offset + stack_alignment_needed - 1)
4925 & -stack_alignment_needed) - offset;
4927 offset += frame->padding1;
4929 /* Frame pointer points here. */
4930 frame->frame_pointer_offset = offset;
4934 /* Add outgoing arguments area. Can be skipped if we eliminated
4935 all the function calls as dead code. */
4936 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4938 offset += current_function_outgoing_args_size;
4939 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4942 frame->outgoing_arguments_size = 0;
4944 /* Align stack boundary. Only needed if we're calling another function
4946 if (!current_function_is_leaf || current_function_calls_alloca)
4947 frame->padding2 = ((offset + preferred_alignment - 1)
4948 & -preferred_alignment) - offset;
4950 frame->padding2 = 0;
4952 offset += frame->padding2;
4954 /* We've reached end of stack frame. */
4955 frame->stack_pointer_offset = offset;
4957 /* Size prologue needs to allocate. */
4958 frame->to_allocate =
4959 (size + frame->padding1 + frame->padding2
4960 + frame->outgoing_arguments_size + frame->va_arg_size);
4962 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4963 && current_function_is_leaf)
4965 frame->red_zone_size = frame->to_allocate;
4966 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4967 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4970 frame->red_zone_size = 0;
4971 frame->to_allocate -= frame->red_zone_size;
4972 frame->stack_pointer_offset -= frame->red_zone_size;
4974 fprintf (stderr, "nregs: %i\n", frame->nregs);
4975 fprintf (stderr, "size: %i\n", size);
4976 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4977 fprintf (stderr, "padding1: %i\n", frame->padding1);
4978 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4979 fprintf (stderr, "padding2: %i\n", frame->padding2);
4980 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4981 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4982 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4983 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4984 frame->hard_frame_pointer_offset);
4985 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4989 /* Emit code to save registers in the prologue. */
4992 ix86_emit_save_regs ()
4997 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4998 if (ix86_save_reg (regno, true))
5000 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5001 RTX_FRAME_RELATED_P (insn) = 1;
5005 /* Emit code to save registers using MOV insns. First register
5006 is restored from POINTER + OFFSET. */
5008 ix86_emit_save_regs_using_mov (pointer, offset)
5010 HOST_WIDE_INT offset;
5015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5016 if (ix86_save_reg (regno, true))
5018 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5020 gen_rtx_REG (Pmode, regno));
5021 RTX_FRAME_RELATED_P (insn) = 1;
5022 offset += UNITS_PER_WORD;
5026 /* Expand the prologue into a bunch of separate insns. */
5029 ix86_expand_prologue ()
5033 struct ix86_frame frame;
5035 HOST_WIDE_INT allocate;
5037 ix86_compute_frame_layout (&frame);
5040 int count = frame.nregs;
5042 /* The fast prologue uses move instead of push to save registers. This
5043 is significantly longer, but also executes faster as modern hardware
5044 can execute the moves in parallel, but can't do that for push/pop.
5046 Be careful about choosing what prologue to emit: When function takes
5047 many instructions to execute we may use slow version as well as in
5048 case function is known to be outside hot spot (this is known with
5049 feedback only). Weight the size of function by number of registers
5050 to save as it is cheap to use one or two push instructions but very
5051 slow to use many of them. */
5053 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5054 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5055 || (flag_branch_probabilities
5056 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5057 use_fast_prologue_epilogue = 0;
5059 use_fast_prologue_epilogue = !expensive_function_p (count);
5060 if (TARGET_PROLOGUE_USING_MOVE)
5061 use_mov = use_fast_prologue_epilogue;
5064 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5065 slower on all targets. Also sdb doesn't like it. */
5067 if (frame_pointer_needed)
5069 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5070 RTX_FRAME_RELATED_P (insn) = 1;
5072 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5073 RTX_FRAME_RELATED_P (insn) = 1;
5076 allocate = frame.to_allocate;
5077 /* In case we are dealing only with single register and empty frame,
5078 push is equivalent of the mov+add sequence. */
5079 if (allocate == 0 && frame.nregs <= 1)
5083 ix86_emit_save_regs ();
5085 allocate += frame.nregs * UNITS_PER_WORD;
5089 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5091 insn = emit_insn (gen_pro_epilogue_adjust_stack
5092 (stack_pointer_rtx, stack_pointer_rtx,
5093 GEN_INT (-allocate)));
5094 RTX_FRAME_RELATED_P (insn) = 1;
5098 /* ??? Is this only valid for Win32? */
5105 arg0 = gen_rtx_REG (SImode, 0);
5106 emit_move_insn (arg0, GEN_INT (allocate));
5108 sym = gen_rtx_MEM (FUNCTION_MODE,
5109 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5110 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5112 CALL_INSN_FUNCTION_USAGE (insn)
5113 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5114 CALL_INSN_FUNCTION_USAGE (insn));
5116 /* Don't allow scheduling pass to move insns across __alloca
5118 emit_insn (gen_blockage (const0_rtx));
5122 if (!frame_pointer_needed || !frame.to_allocate)
5123 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5125 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5126 -frame.nregs * UNITS_PER_WORD);
5129 #ifdef SUBTARGET_PROLOGUE
5133 pic_reg_used = false;
5134 if (pic_offset_table_rtx
5135 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5136 || current_function_profile))
5138 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5140 if (alt_pic_reg_used != INVALID_REGNUM)
5141 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5143 pic_reg_used = true;
5148 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5150 /* Even with accurate pre-reload life analysis, we can wind up
5151 deleting all references to the pic register after reload.
5152 Consider if cross-jumping unifies two sides of a branch
5153 controlled by a comparison vs the only read from a global.
5154 In which case, allow the set_got to be deleted, though we're
5155 too late to do anything about the ebx save in the prologue. */
5156 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5159 /* Prevent function calls from be scheduled before the call to mcount.
5160 In the pic_reg_used case, make sure that the got load isn't deleted. */
5161 if (current_function_profile)
5162 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5165 /* Emit code to restore saved registers using MOV insns. First register
5166 is restored from POINTER + OFFSET. */
5168 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5171 int maybe_eh_return;
5175 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5176 if (ix86_save_reg (regno, maybe_eh_return))
5178 emit_move_insn (gen_rtx_REG (Pmode, regno),
5179 adjust_address (gen_rtx_MEM (Pmode, pointer),
5181 offset += UNITS_PER_WORD;
5185 /* Restore function stack, frame, and registers. */
5188 ix86_expand_epilogue (style)
5192 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5193 struct ix86_frame frame;
5194 HOST_WIDE_INT offset;
5196 ix86_compute_frame_layout (&frame);
5198 /* Calculate start of saved registers relative to ebp. Special care
5199 must be taken for the normal return case of a function using
5200 eh_return: the eax and edx registers are marked as saved, but not
5201 restored along this path. */
5202 offset = frame.nregs;
5203 if (current_function_calls_eh_return && style != 2)
5205 offset *= -UNITS_PER_WORD;
5207 /* If we're only restoring one register and sp is not valid then
5208 using a move instruction to restore the register since it's
5209 less work than reloading sp and popping the register.
5211 The default code result in stack adjustment using add/lea instruction,
5212 while this code results in LEAVE instruction (or discrete equivalent),
5213 so it is profitable in some other cases as well. Especially when there
5214 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5215 and there is exactly one register to pop. This heuristic may need some
5216 tuning in future. */
5217 if ((!sp_valid && frame.nregs <= 1)
5218 || (TARGET_EPILOGUE_USING_MOVE
5219 && use_fast_prologue_epilogue
5220 && (frame.nregs > 1 || frame.to_allocate))
5221 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5222 || (frame_pointer_needed && TARGET_USE_LEAVE
5223 && use_fast_prologue_epilogue && frame.nregs == 1)
5224 || current_function_calls_eh_return)
5226 /* Restore registers. We can use ebp or esp to address the memory
5227 locations. If both are available, default to ebp, since offsets
5228 are known to be small. Only exception is esp pointing directly to the
5229 end of block of saved registers, where we may simplify addressing
5232 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5233 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5234 frame.to_allocate, style == 2);
5236 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5237 offset, style == 2);
5239 /* eh_return epilogues need %ecx added to the stack pointer. */
5242 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5244 if (frame_pointer_needed)
5246 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5247 tmp = plus_constant (tmp, UNITS_PER_WORD);
5248 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5250 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5251 emit_move_insn (hard_frame_pointer_rtx, tmp);
5253 emit_insn (gen_pro_epilogue_adjust_stack
5254 (stack_pointer_rtx, sa, const0_rtx));
5258 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5259 tmp = plus_constant (tmp, (frame.to_allocate
5260 + frame.nregs * UNITS_PER_WORD));
5261 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5264 else if (!frame_pointer_needed)
5265 emit_insn (gen_pro_epilogue_adjust_stack
5266 (stack_pointer_rtx, stack_pointer_rtx,
5267 GEN_INT (frame.to_allocate
5268 + frame.nregs * UNITS_PER_WORD)));
5269 /* If not an i386, mov & pop is faster than "leave". */
5270 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5271 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5274 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5275 hard_frame_pointer_rtx,
5278 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5280 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5285 /* First step is to deallocate the stack frame so that we can
5286 pop the registers. */
5289 if (!frame_pointer_needed)
5291 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5292 hard_frame_pointer_rtx,
5295 else if (frame.to_allocate)
5296 emit_insn (gen_pro_epilogue_adjust_stack
5297 (stack_pointer_rtx, stack_pointer_rtx,
5298 GEN_INT (frame.to_allocate)));
5300 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5301 if (ix86_save_reg (regno, false))
5304 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5306 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5308 if (frame_pointer_needed)
5310 /* Leave results in shorter dependency chains on CPUs that are
5311 able to grok it fast. */
5312 if (TARGET_USE_LEAVE)
5313 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5314 else if (TARGET_64BIT)
5315 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5317 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5321 /* Sibcall epilogues don't want a return instruction. */
5325 if (current_function_pops_args && current_function_args_size)
5327 rtx popc = GEN_INT (current_function_pops_args);
5329 /* i386 can only pop 64K bytes. If asked to pop more, pop
5330 return address, do explicit add, and jump indirectly to the
5333 if (current_function_pops_args >= 65536)
5335 rtx ecx = gen_rtx_REG (SImode, 2);
5337 /* There are is no "pascal" calling convention in 64bit ABI. */
5341 emit_insn (gen_popsi1 (ecx));
5342 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5343 emit_jump_insn (gen_return_indirect_internal (ecx));
5346 emit_jump_insn (gen_return_pop_internal (popc));
5349 emit_jump_insn (gen_return_internal ());
5352 /* Reset from the function's potential modifications. */
5355 ix86_output_function_epilogue (file, size)
5356 FILE *file ATTRIBUTE_UNUSED;
5357 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5359 if (pic_offset_table_rtx)
5360 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5363 /* Extract the parts of an RTL expression that is a valid memory address
5364 for an instruction. Return 0 if the structure of the address is
5365 grossly off. Return -1 if the address contains ASHIFT, so it is not
5366 strictly valid, but still used for computing length of lea instruction.
5370 ix86_decompose_address (addr, out)
5372 struct ix86_address *out;
5374 rtx base = NULL_RTX;
5375 rtx index = NULL_RTX;
5376 rtx disp = NULL_RTX;
5377 HOST_WIDE_INT scale = 1;
5378 rtx scale_rtx = NULL_RTX;
5381 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5383 else if (GET_CODE (addr) == PLUS)
5385 rtx op0 = XEXP (addr, 0);
5386 rtx op1 = XEXP (addr, 1);
5387 enum rtx_code code0 = GET_CODE (op0);
5388 enum rtx_code code1 = GET_CODE (op1);
5390 if (code0 == REG || code0 == SUBREG)
5392 if (code1 == REG || code1 == SUBREG)
5393 index = op0, base = op1; /* index + base */
5395 base = op0, disp = op1; /* base + displacement */
5397 else if (code0 == MULT)
5399 index = XEXP (op0, 0);
5400 scale_rtx = XEXP (op0, 1);
5401 if (code1 == REG || code1 == SUBREG)
5402 base = op1; /* index*scale + base */
5404 disp = op1; /* index*scale + disp */
5406 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5408 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5409 scale_rtx = XEXP (XEXP (op0, 0), 1);
5410 base = XEXP (op0, 1);
5413 else if (code0 == PLUS)
5415 index = XEXP (op0, 0); /* index + base + disp */
5416 base = XEXP (op0, 1);
5422 else if (GET_CODE (addr) == MULT)
5424 index = XEXP (addr, 0); /* index*scale */
5425 scale_rtx = XEXP (addr, 1);
5427 else if (GET_CODE (addr) == ASHIFT)
5431 /* We're called for lea too, which implements ashift on occasion. */
5432 index = XEXP (addr, 0);
5433 tmp = XEXP (addr, 1);
5434 if (GET_CODE (tmp) != CONST_INT)
5436 scale = INTVAL (tmp);
5437 if ((unsigned HOST_WIDE_INT) scale > 3)
5443 disp = addr; /* displacement */
5445 /* Extract the integral value of scale. */
5448 if (GET_CODE (scale_rtx) != CONST_INT)
5450 scale = INTVAL (scale_rtx);
5453 /* Allow arg pointer and stack pointer as index if there is not scaling */
5454 if (base && index && scale == 1
5455 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5456 || index == stack_pointer_rtx))
5463 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5464 if ((base == hard_frame_pointer_rtx
5465 || base == frame_pointer_rtx
5466 || base == arg_pointer_rtx) && !disp)
5469 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5470 Avoid this by transforming to [%esi+0]. */
5471 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5472 && base && !index && !disp
5474 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5477 /* Special case: encode reg+reg instead of reg*2. */
5478 if (!base && index && scale && scale == 2)
5479 base = index, scale = 1;
5481 /* Special case: scaling cannot be encoded without base or displacement. */
5482 if (!base && !disp && index && scale != 1)
5493 /* Return cost of the memory address x.
5494 For i386, it is better to use a complex address than let gcc copy
5495 the address into a reg and make a new pseudo. But not if the address
5496 requires to two regs - that would mean more pseudos with longer
5499 ix86_address_cost (x)
5502 struct ix86_address parts;
5505 if (!ix86_decompose_address (x, &parts))
5508 if (parts.base && GET_CODE (parts.base) == SUBREG)
5509 parts.base = SUBREG_REG (parts.base);
5510 if (parts.index && GET_CODE (parts.index) == SUBREG)
5511 parts.index = SUBREG_REG (parts.index);
5513 /* More complex memory references are better. */
5514 if (parts.disp && parts.disp != const0_rtx)
5517 /* Attempt to minimize number of registers in the address. */
5519 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5521 && (!REG_P (parts.index)
5522 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5526 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5528 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5529 && parts.base != parts.index)
5532 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5533 since it's predecode logic can't detect the length of instructions
5534 and it degenerates to vector decoded. Increase cost of such
5535 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5536 to split such addresses or even refuse such addresses at all.
5538 Following addressing modes are affected:
5543 The first and last case may be avoidable by explicitly coding the zero in
5544 memory address, but I don't have AMD-K6 machine handy to check this
5548 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5549 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5550 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5556 /* If X is a machine specific address (i.e. a symbol or label being
5557 referenced as a displacement from the GOT implemented using an
5558 UNSPEC), then return the base term. Otherwise return X. */
5561 ix86_find_base_term (x)
5568 if (GET_CODE (x) != CONST)
5571 if (GET_CODE (term) == PLUS
5572 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5573 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5574 term = XEXP (term, 0);
5575 if (GET_CODE (term) != UNSPEC
5576 || XINT (term, 1) != UNSPEC_GOTPCREL)
5579 term = XVECEXP (term, 0, 0);
5581 if (GET_CODE (term) != SYMBOL_REF
5582 && GET_CODE (term) != LABEL_REF)
5588 term = ix86_delegitimize_address (x);
5590 if (GET_CODE (term) != SYMBOL_REF
5591 && GET_CODE (term) != LABEL_REF)
5597 /* Determine if a given RTX is a valid constant. We already know this
5598 satisfies CONSTANT_P. */
5601 legitimate_constant_p (x)
5606 switch (GET_CODE (x))
5609 /* TLS symbols are not constant. */
5610 if (tls_symbolic_operand (x, Pmode))
5615 inner = XEXP (x, 0);
5617 /* Offsets of TLS symbols are never valid.
5618 Discourage CSE from creating them. */
5619 if (GET_CODE (inner) == PLUS
5620 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5623 /* Only some unspecs are valid as "constants". */
5624 if (GET_CODE (inner) == UNSPEC)
5625 switch (XINT (inner, 1))
5628 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5638 /* Otherwise we handle everything else in the move patterns. */
5642 /* Determine if it's legal to put X into the constant pool. This
5643 is not possible for the address of thread-local symbols, which
5644 is checked above. */
5647 ix86_cannot_force_const_mem (x)
5650 return !legitimate_constant_p (x);
5653 /* Determine if a given RTX is a valid constant address. */
5656 constant_address_p (x)
5659 switch (GET_CODE (x))
5666 return TARGET_64BIT;
5669 /* For Mach-O, really believe the CONST. */
5672 /* Otherwise fall through. */
5674 return !flag_pic && legitimate_constant_p (x);
5681 /* Nonzero if the constant value X is a legitimate general operand
5682 when generating PIC code. It is given that flag_pic is on and
5683 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5686 legitimate_pic_operand_p (x)
5691 switch (GET_CODE (x))
5694 inner = XEXP (x, 0);
5696 /* Only some unspecs are valid as "constants". */
5697 if (GET_CODE (inner) == UNSPEC)
5698 switch (XINT (inner, 1))
5701 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5709 return legitimate_pic_address_disp_p (x);
5716 /* Determine if a given CONST RTX is a valid memory displacement
5720 legitimate_pic_address_disp_p (disp)
5725 /* In 64bit mode we can allow direct addresses of symbols and labels
5726 when they are not dynamic symbols. */
5729 /* TLS references should always be enclosed in UNSPEC. */
5730 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5732 if (GET_CODE (disp) == SYMBOL_REF
5733 && ix86_cmodel == CM_SMALL_PIC
5734 && (CONSTANT_POOL_ADDRESS_P (disp)
5735 || SYMBOL_REF_FLAG (disp)))
5737 if (GET_CODE (disp) == LABEL_REF)
5739 if (GET_CODE (disp) == CONST
5740 && GET_CODE (XEXP (disp, 0)) == PLUS
5741 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5742 && ix86_cmodel == CM_SMALL_PIC
5743 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5744 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5745 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5746 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5747 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5748 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5751 if (GET_CODE (disp) != CONST)
5753 disp = XEXP (disp, 0);
5757 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5758 of GOT tables. We should not need these anyway. */
5759 if (GET_CODE (disp) != UNSPEC
5760 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5763 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5764 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5770 if (GET_CODE (disp) == PLUS)
5772 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5774 disp = XEXP (disp, 0);
5778 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5779 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5781 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5782 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5783 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5785 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5786 if (strstr (sym_name, "$pb") != 0)
5791 if (GET_CODE (disp) != UNSPEC)
5794 switch (XINT (disp, 1))
5799 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5801 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5802 case UNSPEC_GOTTPOFF:
5803 case UNSPEC_GOTNTPOFF:
5804 case UNSPEC_INDNTPOFF:
5807 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5809 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5811 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5817 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5818 memory address for an instruction. The MODE argument is the machine mode
5819 for the MEM expression that wants to use this address.
5821 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5822 convert common non-canonical forms to canonical form so that they will
5826 legitimate_address_p (mode, addr, strict)
5827 enum machine_mode mode;
5831 struct ix86_address parts;
5832 rtx base, index, disp;
5833 HOST_WIDE_INT scale;
5834 const char *reason = NULL;
5835 rtx reason_rtx = NULL_RTX;
5837 if (TARGET_DEBUG_ADDR)
5840 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5841 GET_MODE_NAME (mode), strict);
5845 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5847 if (TARGET_DEBUG_ADDR)
5848 fprintf (stderr, "Success.\n");
5852 if (ix86_decompose_address (addr, &parts) <= 0)
5854 reason = "decomposition failed";
5859 index = parts.index;
5861 scale = parts.scale;
5863 /* Validate base register.
5865 Don't allow SUBREG's here, it can lead to spill failures when the base
5866 is one word out of a two word structure, which is represented internally
5874 if (GET_CODE (base) == SUBREG)
5875 reg = SUBREG_REG (base);
5879 if (GET_CODE (reg) != REG)
5881 reason = "base is not a register";
5885 if (GET_MODE (base) != Pmode)
5887 reason = "base is not in Pmode";
5891 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5892 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5894 reason = "base is not valid";
5899 /* Validate index register.
5901 Don't allow SUBREG's here, it can lead to spill failures when the index
5902 is one word out of a two word structure, which is represented internally
5910 if (GET_CODE (index) == SUBREG)
5911 reg = SUBREG_REG (index);
5915 if (GET_CODE (reg) != REG)
5917 reason = "index is not a register";
5921 if (GET_MODE (index) != Pmode)
5923 reason = "index is not in Pmode";
5927 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5928 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5930 reason = "index is not valid";
5935 /* Validate scale factor. */
5938 reason_rtx = GEN_INT (scale);
5941 reason = "scale without index";
5945 if (scale != 2 && scale != 4 && scale != 8)
5947 reason = "scale is not a valid multiplier";
5952 /* Validate displacement. */
5957 if (GET_CODE (disp) == CONST
5958 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5959 switch (XINT (XEXP (disp, 0), 1))
5963 case UNSPEC_GOTPCREL:
5966 goto is_legitimate_pic;
5968 case UNSPEC_GOTTPOFF:
5969 case UNSPEC_GOTNTPOFF:
5970 case UNSPEC_INDNTPOFF:
5976 reason = "invalid address unspec";
5980 else if (flag_pic && (SYMBOLIC_CONST (disp)
5982 && !machopic_operand_p (disp)
5987 if (TARGET_64BIT && (index || base))
5989 /* foo@dtpoff(%rX) is ok. */
5990 if (GET_CODE (disp) != CONST
5991 || GET_CODE (XEXP (disp, 0)) != PLUS
5992 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5993 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5994 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5995 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5997 reason = "non-constant pic memory reference";
6001 else if (! legitimate_pic_address_disp_p (disp))
6003 reason = "displacement is an invalid pic construct";
6007 /* This code used to verify that a symbolic pic displacement
6008 includes the pic_offset_table_rtx register.
6010 While this is good idea, unfortunately these constructs may
6011 be created by "adds using lea" optimization for incorrect
6020 This code is nonsensical, but results in addressing
6021 GOT table with pic_offset_table_rtx base. We can't
6022 just refuse it easily, since it gets matched by
6023 "addsi3" pattern, that later gets split to lea in the
6024 case output register differs from input. While this
6025 can be handled by separate addsi pattern for this case
6026 that never results in lea, this seems to be easier and
6027 correct fix for crash to disable this test. */
6029 else if (!CONSTANT_ADDRESS_P (disp))
6031 reason = "displacement is not constant";
6034 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6036 reason = "displacement is out of range";
6039 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
6041 reason = "displacement is a const_double";
6046 /* Everything looks valid. */
6047 if (TARGET_DEBUG_ADDR)
6048 fprintf (stderr, "Success.\n");
6052 if (TARGET_DEBUG_ADDR)
6054 fprintf (stderr, "Error: %s\n", reason);
6055 debug_rtx (reason_rtx);
6060 /* Return an unique alias set for the GOT. */
6062 static HOST_WIDE_INT
6063 ix86_GOT_alias_set ()
6065 static HOST_WIDE_INT set = -1;
6067 set = new_alias_set ();
6071 /* Return a legitimate reference for ORIG (an address) using the
6072 register REG. If REG is 0, a new pseudo is generated.
6074 There are two types of references that must be handled:
6076 1. Global data references must load the address from the GOT, via
6077 the PIC reg. An insn is emitted to do this load, and the reg is
6080 2. Static data references, constant pool addresses, and code labels
6081 compute the address as an offset from the GOT, whose base is in
6082 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
6083 differentiate them from global data objects. The returned
6084 address is the PIC reg + an unspec constant.
6086 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6087 reg also appears in the address. */
6090 legitimize_pic_address (orig, reg)
6100 reg = gen_reg_rtx (Pmode);
6101 /* Use the generic Mach-O PIC machinery. */
6102 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6105 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6107 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6109 /* This symbol may be referenced via a displacement from the PIC
6110 base address (@GOTOFF). */
6112 if (reload_in_progress)
6113 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6114 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6115 new = gen_rtx_CONST (Pmode, new);
6116 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6120 emit_move_insn (reg, new);
6124 else if (GET_CODE (addr) == SYMBOL_REF)
6128 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6129 new = gen_rtx_CONST (Pmode, new);
6130 new = gen_rtx_MEM (Pmode, new);
6131 RTX_UNCHANGING_P (new) = 1;
6132 set_mem_alias_set (new, ix86_GOT_alias_set ());
6135 reg = gen_reg_rtx (Pmode);
6136 /* Use directly gen_movsi, otherwise the address is loaded
6137 into register for CSE. We don't want to CSE this addresses,
6138 instead we CSE addresses from the GOT table, so skip this. */
6139 emit_insn (gen_movsi (reg, new));
6144 /* This symbol must be referenced via a load from the
6145 Global Offset Table (@GOT). */
6147 if (reload_in_progress)
6148 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6149 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6150 new = gen_rtx_CONST (Pmode, new);
6151 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6152 new = gen_rtx_MEM (Pmode, new);
6153 RTX_UNCHANGING_P (new) = 1;
6154 set_mem_alias_set (new, ix86_GOT_alias_set ());
6157 reg = gen_reg_rtx (Pmode);
6158 emit_move_insn (reg, new);
6164 if (GET_CODE (addr) == CONST)
6166 addr = XEXP (addr, 0);
6168 /* We must match stuff we generate before. Assume the only
6169 unspecs that can get here are ours. Not that we could do
6170 anything with them anyway... */
6171 if (GET_CODE (addr) == UNSPEC
6172 || (GET_CODE (addr) == PLUS
6173 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6175 if (GET_CODE (addr) != PLUS)
6178 if (GET_CODE (addr) == PLUS)
6180 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6182 /* Check first to see if this is a constant offset from a @GOTOFF
6183 symbol reference. */
6184 if (local_symbolic_operand (op0, Pmode)
6185 && GET_CODE (op1) == CONST_INT)
6189 if (reload_in_progress)
6190 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6191 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6193 new = gen_rtx_PLUS (Pmode, new, op1);
6194 new = gen_rtx_CONST (Pmode, new);
6195 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6199 emit_move_insn (reg, new);
6205 if (INTVAL (op1) < -16*1024*1024
6206 || INTVAL (op1) >= 16*1024*1024)
6207 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6212 base = legitimize_pic_address (XEXP (addr, 0), reg);
6213 new = legitimize_pic_address (XEXP (addr, 1),
6214 base == reg ? NULL_RTX : reg);
6216 if (GET_CODE (new) == CONST_INT)
6217 new = plus_constant (base, INTVAL (new));
6220 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6222 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6223 new = XEXP (new, 1);
6225 new = gen_rtx_PLUS (Pmode, base, new);
6234 ix86_encode_section_info (decl, first)
6236 int first ATTRIBUTE_UNUSED;
6238 bool local_p = (*targetm.binds_local_p) (decl);
6241 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6242 if (GET_CODE (rtl) != MEM)
6244 symbol = XEXP (rtl, 0);
6245 if (GET_CODE (symbol) != SYMBOL_REF)
6248 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6249 symbol so that we may access it directly in the GOT. */
6252 SYMBOL_REF_FLAG (symbol) = local_p;
6254 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6255 "local dynamic", "initial exec" or "local exec" TLS models
6258 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6260 const char *symbol_str;
6263 enum tls_model kind = decl_tls_model (decl);
6265 if (TARGET_64BIT && ! flag_pic)
6267 /* x86-64 doesn't allow non-pic code for shared libraries,
6268 so don't generate GD/LD TLS models for non-pic code. */
6271 case TLS_MODEL_GLOBAL_DYNAMIC:
6272 kind = TLS_MODEL_INITIAL_EXEC; break;
6273 case TLS_MODEL_LOCAL_DYNAMIC:
6274 kind = TLS_MODEL_LOCAL_EXEC; break;
6280 symbol_str = XSTR (symbol, 0);
6282 if (symbol_str[0] == '%')
6284 if (symbol_str[1] == tls_model_chars[kind])
6288 len = strlen (symbol_str) + 1;
6289 newstr = alloca (len + 2);
6292 newstr[1] = tls_model_chars[kind];
6293 memcpy (newstr + 2, symbol_str, len);
6295 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6299 /* Undo the above when printing symbol names. */
6302 ix86_strip_name_encoding (str)
6312 /* Load the thread pointer into a register. */
6315 get_thread_pointer ()
6319 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6320 tp = gen_rtx_MEM (Pmode, tp);
6321 RTX_UNCHANGING_P (tp) = 1;
6322 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6323 tp = force_reg (Pmode, tp);
6328 /* Try machine-dependent ways of modifying an illegitimate address
6329 to be legitimate. If we find one, return the new, valid address.
6330 This macro is used in only one place: `memory_address' in explow.c.
6332 OLDX is the address as it was before break_out_memory_refs was called.
6333 In some cases it is useful to look at this to decide what needs to be done.
6335 MODE and WIN are passed so that this macro can use
6336 GO_IF_LEGITIMATE_ADDRESS.
6338 It is always safe for this macro to do nothing. It exists to recognize
6339 opportunities to optimize the output.
6341 For the 80386, we handle X+REG by loading X into a register R and
6342 using R+REG. R will go in a general reg and indexing will be used.
6343 However, if REG is a broken-out memory address or multiplication,
6344 nothing needs to be done because REG can certainly go in a general reg.
6346 When -fpic is used, special handling is needed for symbolic references.
6347 See comments by legitimize_pic_address in i386.c for details. */
6350 legitimize_address (x, oldx, mode)
6352 register rtx oldx ATTRIBUTE_UNUSED;
6353 enum machine_mode mode;
6358 if (TARGET_DEBUG_ADDR)
6360 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6361 GET_MODE_NAME (mode));
6365 log = tls_symbolic_operand (x, mode);
6368 rtx dest, base, off, pic;
6373 case TLS_MODEL_GLOBAL_DYNAMIC:
6374 dest = gen_reg_rtx (Pmode);
6377 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6380 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6381 insns = get_insns ();
6384 emit_libcall_block (insns, dest, rax, x);
6387 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6390 case TLS_MODEL_LOCAL_DYNAMIC:
6391 base = gen_reg_rtx (Pmode);
6394 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6397 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6398 insns = get_insns ();
6401 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6402 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6403 emit_libcall_block (insns, base, rax, note);
6406 emit_insn (gen_tls_local_dynamic_base_32 (base));
6408 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6409 off = gen_rtx_CONST (Pmode, off);
6411 return gen_rtx_PLUS (Pmode, base, off);
6413 case TLS_MODEL_INITIAL_EXEC:
6417 type = UNSPEC_GOTNTPOFF;
6421 if (reload_in_progress)
6422 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6423 pic = pic_offset_table_rtx;
6424 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6426 else if (!TARGET_GNU_TLS)
6428 pic = gen_reg_rtx (Pmode);
6429 emit_insn (gen_set_got (pic));
6430 type = UNSPEC_GOTTPOFF;
6435 type = UNSPEC_INDNTPOFF;
6438 base = get_thread_pointer ();
6440 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6441 off = gen_rtx_CONST (Pmode, off);
6443 off = gen_rtx_PLUS (Pmode, pic, off);
6444 off = gen_rtx_MEM (Pmode, off);
6445 RTX_UNCHANGING_P (off) = 1;
6446 set_mem_alias_set (off, ix86_GOT_alias_set ());
6447 dest = gen_reg_rtx (Pmode);
6449 if (TARGET_64BIT || TARGET_GNU_TLS)
6451 emit_move_insn (dest, off);
6452 return gen_rtx_PLUS (Pmode, base, dest);
6455 emit_insn (gen_subsi3 (dest, base, off));
6458 case TLS_MODEL_LOCAL_EXEC:
6459 base = get_thread_pointer ();
6461 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6462 (TARGET_64BIT || TARGET_GNU_TLS)
6463 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6464 off = gen_rtx_CONST (Pmode, off);
6466 if (TARGET_64BIT || TARGET_GNU_TLS)
6467 return gen_rtx_PLUS (Pmode, base, off);
6470 dest = gen_reg_rtx (Pmode);
6471 emit_insn (gen_subsi3 (dest, base, off));
6482 if (flag_pic && SYMBOLIC_CONST (x))
6483 return legitimize_pic_address (x, 0);
6485 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6486 if (GET_CODE (x) == ASHIFT
6487 && GET_CODE (XEXP (x, 1)) == CONST_INT
6488 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6491 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6492 GEN_INT (1 << log));
6495 if (GET_CODE (x) == PLUS)
6497 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6499 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6500 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6501 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6504 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6505 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6506 GEN_INT (1 << log));
6509 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6510 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6511 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6514 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6515 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6516 GEN_INT (1 << log));
6519 /* Put multiply first if it isn't already. */
6520 if (GET_CODE (XEXP (x, 1)) == MULT)
6522 rtx tmp = XEXP (x, 0);
6523 XEXP (x, 0) = XEXP (x, 1);
6528 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6529 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6530 created by virtual register instantiation, register elimination, and
6531 similar optimizations. */
6532 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6535 x = gen_rtx_PLUS (Pmode,
6536 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6537 XEXP (XEXP (x, 1), 0)),
6538 XEXP (XEXP (x, 1), 1));
6542 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6543 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6544 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6545 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6546 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6547 && CONSTANT_P (XEXP (x, 1)))
6550 rtx other = NULL_RTX;
6552 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6554 constant = XEXP (x, 1);
6555 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6557 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6559 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6560 other = XEXP (x, 1);
6568 x = gen_rtx_PLUS (Pmode,
6569 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6570 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6571 plus_constant (other, INTVAL (constant)));
6575 if (changed && legitimate_address_p (mode, x, FALSE))
6578 if (GET_CODE (XEXP (x, 0)) == MULT)
6581 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6584 if (GET_CODE (XEXP (x, 1)) == MULT)
6587 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6591 && GET_CODE (XEXP (x, 1)) == REG
6592 && GET_CODE (XEXP (x, 0)) == REG)
6595 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6598 x = legitimize_pic_address (x, 0);
6601 if (changed && legitimate_address_p (mode, x, FALSE))
6604 if (GET_CODE (XEXP (x, 0)) == REG)
6606 register rtx temp = gen_reg_rtx (Pmode);
6607 register rtx val = force_operand (XEXP (x, 1), temp);
6609 emit_move_insn (temp, val);
6615 else if (GET_CODE (XEXP (x, 1)) == REG)
6617 register rtx temp = gen_reg_rtx (Pmode);
6618 register rtx val = force_operand (XEXP (x, 0), temp);
6620 emit_move_insn (temp, val);
6630 /* Print an integer constant expression in assembler syntax. Addition
6631 and subtraction are the only arithmetic that may appear in these
6632 expressions. FILE is the stdio stream to write to, X is the rtx, and
6633 CODE is the operand print code from the output string. */
6636 output_pic_addr_const (file, x, code)
6643 switch (GET_CODE (x))
6653 assemble_name (file, XSTR (x, 0));
6654 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6655 fputs ("@PLT", file);
6662 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6663 assemble_name (asm_out_file, buf);
6667 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6671 /* This used to output parentheses around the expression,
6672 but that does not work on the 386 (either ATT or BSD assembler). */
6673 output_pic_addr_const (file, XEXP (x, 0), code);
6677 if (GET_MODE (x) == VOIDmode)
6679 /* We can use %d if the number is <32 bits and positive. */
6680 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6681 fprintf (file, "0x%lx%08lx",
6682 (unsigned long) CONST_DOUBLE_HIGH (x),
6683 (unsigned long) CONST_DOUBLE_LOW (x));
6685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6688 /* We can't handle floating point constants;
6689 PRINT_OPERAND must handle them. */
6690 output_operand_lossage ("floating constant misused");
6694 /* Some assemblers need integer constants to appear first. */
6695 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6697 output_pic_addr_const (file, XEXP (x, 0), code);
6699 output_pic_addr_const (file, XEXP (x, 1), code);
6701 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6703 output_pic_addr_const (file, XEXP (x, 1), code);
6705 output_pic_addr_const (file, XEXP (x, 0), code);
6713 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6714 output_pic_addr_const (file, XEXP (x, 0), code);
6716 output_pic_addr_const (file, XEXP (x, 1), code);
6718 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6722 if (XVECLEN (x, 0) != 1)
6724 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6725 switch (XINT (x, 1))
6728 fputs ("@GOT", file);
6731 fputs ("@GOTOFF", file);
6733 case UNSPEC_GOTPCREL:
6734 fputs ("@GOTPCREL(%rip)", file);
6736 case UNSPEC_GOTTPOFF:
6737 /* FIXME: This might be @TPOFF in Sun ld too. */
6738 fputs ("@GOTTPOFF", file);
6741 fputs ("@TPOFF", file);
6745 fputs ("@TPOFF", file);
6747 fputs ("@NTPOFF", file);
6750 fputs ("@DTPOFF", file);
6752 case UNSPEC_GOTNTPOFF:
6754 fputs ("@GOTTPOFF(%rip)", file);
6756 fputs ("@GOTNTPOFF", file);
6758 case UNSPEC_INDNTPOFF:
6759 fputs ("@INDNTPOFF", file);
6762 output_operand_lossage ("invalid UNSPEC as operand");
6768 output_operand_lossage ("invalid expression as operand");
6772 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6773 We need to handle our special PIC relocations. */
6776 i386_dwarf_output_addr_const (file, x)
6781 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6785 fprintf (file, "%s", ASM_LONG);
6788 output_pic_addr_const (file, x, '\0');
6790 output_addr_const (file, x);
6794 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6795 We need to emit DTP-relative relocations. */
6798 i386_output_dwarf_dtprel (file, size, x)
6803 fputs (ASM_LONG, file);
6804 output_addr_const (file, x);
6805 fputs ("@DTPOFF", file);
6811 fputs (", 0", file);
6818 /* In the name of slightly smaller debug output, and to cater to
6819 general assembler losage, recognize PIC+GOTOFF and turn it back
6820 into a direct symbol reference. */
6823 ix86_delegitimize_address (orig_x)
6828 if (GET_CODE (x) == MEM)
6833 if (GET_CODE (x) != CONST
6834 || GET_CODE (XEXP (x, 0)) != UNSPEC
6835 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6836 || GET_CODE (orig_x) != MEM)
6838 return XVECEXP (XEXP (x, 0), 0, 0);
6841 if (GET_CODE (x) != PLUS
6842 || GET_CODE (XEXP (x, 1)) != CONST)
6845 if (GET_CODE (XEXP (x, 0)) == REG
6846 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6847 /* %ebx + GOT/GOTOFF */
6849 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6851 /* %ebx + %reg * scale + GOT/GOTOFF */
6853 if (GET_CODE (XEXP (y, 0)) == REG
6854 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6856 else if (GET_CODE (XEXP (y, 1)) == REG
6857 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6861 if (GET_CODE (y) != REG
6862 && GET_CODE (y) != MULT
6863 && GET_CODE (y) != ASHIFT)
6869 x = XEXP (XEXP (x, 1), 0);
6870 if (GET_CODE (x) == UNSPEC
6871 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6872 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6875 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6876 return XVECEXP (x, 0, 0);
6879 if (GET_CODE (x) == PLUS
6880 && GET_CODE (XEXP (x, 0)) == UNSPEC
6881 && GET_CODE (XEXP (x, 1)) == CONST_INT
6882 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6883 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6884 && GET_CODE (orig_x) != MEM)))
6886 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6888 return gen_rtx_PLUS (Pmode, y, x);
6896 put_condition_code (code, mode, reverse, fp, file)
6898 enum machine_mode mode;
6904 if (mode == CCFPmode || mode == CCFPUmode)
6906 enum rtx_code second_code, bypass_code;
6907 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6908 if (bypass_code != NIL || second_code != NIL)
6910 code = ix86_fp_compare_code_to_integer (code);
6914 code = reverse_condition (code);
6925 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6930 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6931 Those same assemblers have the same but opposite losage on cmov. */
6934 suffix = fp ? "nbe" : "a";
6937 if (mode == CCNOmode || mode == CCGOCmode)
6939 else if (mode == CCmode || mode == CCGCmode)
6950 if (mode == CCNOmode || mode == CCGOCmode)
6952 else if (mode == CCmode || mode == CCGCmode)
6961 suffix = fp ? "nb" : "ae";
6964 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6974 suffix = fp ? "u" : "p";
6977 suffix = fp ? "nu" : "np";
6982 fputs (suffix, file);
6986 print_reg (x, code, file)
6991 if (REGNO (x) == ARG_POINTER_REGNUM
6992 || REGNO (x) == FRAME_POINTER_REGNUM
6993 || REGNO (x) == FLAGS_REG
6994 || REGNO (x) == FPSR_REG)
6997 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7000 if (code == 'w' || MMX_REG_P (x))
7002 else if (code == 'b')
7004 else if (code == 'k')
7006 else if (code == 'q')
7008 else if (code == 'y')
7010 else if (code == 'h')
7013 code = GET_MODE_SIZE (GET_MODE (x));
7015 /* Irritatingly, AMD extended registers use different naming convention
7016 from the normal registers. */
7017 if (REX_INT_REG_P (x))
7024 error ("extended registers have no high halves");
7027 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7030 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7033 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7036 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7039 error ("unsupported operand size for extended register");
7047 if (STACK_TOP_P (x))
7049 fputs ("st(0)", file);
7056 if (! ANY_FP_REG_P (x))
7057 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7061 fputs (hi_reg_name[REGNO (x)], file);
7064 fputs (qi_reg_name[REGNO (x)], file);
7067 fputs (qi_high_reg_name[REGNO (x)], file);
7074 /* Locate some local-dynamic symbol still in use by this function
7075 so that we can print its name in some tls_local_dynamic_base
7079 get_some_local_dynamic_name ()
7083 if (cfun->machine->some_ld_name)
7084 return cfun->machine->some_ld_name;
7086 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7088 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7089 return cfun->machine->some_ld_name;
7095 get_some_local_dynamic_name_1 (px, data)
7097 void *data ATTRIBUTE_UNUSED;
7101 if (GET_CODE (x) == SYMBOL_REF
7102 && local_dynamic_symbolic_operand (x, Pmode))
7104 cfun->machine->some_ld_name = XSTR (x, 0);
7112 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7113 C -- print opcode suffix for set/cmov insn.
7114 c -- like C, but print reversed condition
7115 F,f -- likewise, but for floating-point.
7116 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7118 R -- print the prefix for register names.
7119 z -- print the opcode suffix for the size of the current operand.
7120 * -- print a star (in certain assembler syntax)
7121 A -- print an absolute memory reference.
7122 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7123 s -- print a shift double count, followed by the assemblers argument
7125 b -- print the QImode name of the register for the indicated operand.
7126 %b0 would print %al if operands[0] is reg 0.
7127 w -- likewise, print the HImode name of the register.
7128 k -- likewise, print the SImode name of the register.
7129 q -- likewise, print the DImode name of the register.
7130 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7131 y -- print "st(0)" instead of "st" as a register.
7132 D -- print condition for SSE cmp instruction.
7133 P -- if PIC, print an @PLT suffix.
7134 X -- don't print any sort of PIC '@' suffix for a symbol.
7135 & -- print some in-use local-dynamic symbol name.
7139 print_operand (file, x, code)
7149 if (ASSEMBLER_DIALECT == ASM_ATT)
7154 assemble_name (file, get_some_local_dynamic_name ());
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7160 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7162 /* Intel syntax. For absolute addresses, registers should not
7163 be surrounded by braces. */
7164 if (GET_CODE (x) != REG)
7167 PRINT_OPERAND (file, x, 0);
7175 PRINT_OPERAND (file, x, 0);
7180 if (ASSEMBLER_DIALECT == ASM_ATT)
7185 if (ASSEMBLER_DIALECT == ASM_ATT)
7190 if (ASSEMBLER_DIALECT == ASM_ATT)
7195 if (ASSEMBLER_DIALECT == ASM_ATT)
7200 if (ASSEMBLER_DIALECT == ASM_ATT)
7205 if (ASSEMBLER_DIALECT == ASM_ATT)
7210 /* 387 opcodes don't get size suffixes if the operands are
7212 if (STACK_REG_P (x))
7215 /* Likewise if using Intel opcodes. */
7216 if (ASSEMBLER_DIALECT == ASM_INTEL)
7219 /* This is the size of op from size of operand. */
7220 switch (GET_MODE_SIZE (GET_MODE (x)))
7223 #ifdef HAVE_GAS_FILDS_FISTS
7229 if (GET_MODE (x) == SFmode)
7244 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7246 #ifdef GAS_MNEMONICS
7272 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7274 PRINT_OPERAND (file, x, 0);
7280 /* Little bit of braindamage here. The SSE compare instructions
7281 does use completely different names for the comparisons that the
7282 fp conditional moves. */
7283 switch (GET_CODE (x))
7298 fputs ("unord", file);
7302 fputs ("neq", file);
7306 fputs ("nlt", file);
7310 fputs ("nle", file);
7313 fputs ("ord", file);
7321 #ifdef CMOV_SUN_AS_SYNTAX
7322 if (ASSEMBLER_DIALECT == ASM_ATT)
7324 switch (GET_MODE (x))
7326 case HImode: putc ('w', file); break;
7328 case SFmode: putc ('l', file); break;
7330 case DFmode: putc ('q', file); break;
7338 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7341 #ifdef CMOV_SUN_AS_SYNTAX
7342 if (ASSEMBLER_DIALECT == ASM_ATT)
7345 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7348 /* Like above, but reverse condition */
7350 /* Check to see if argument to %c is really a constant
7351 and not a condition code which needs to be reversed. */
7352 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7354 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7357 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7360 #ifdef CMOV_SUN_AS_SYNTAX
7361 if (ASSEMBLER_DIALECT == ASM_ATT)
7364 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7370 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7373 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7376 int pred_val = INTVAL (XEXP (x, 0));
7378 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7379 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7381 int taken = pred_val > REG_BR_PROB_BASE / 2;
7382 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7384 /* Emit hints only in the case default branch prediction
7385 heuristics would fail. */
7386 if (taken != cputaken)
7388 /* We use 3e (DS) prefix for taken branches and
7389 2e (CS) prefix for not taken branches. */
7391 fputs ("ds ; ", file);
7393 fputs ("cs ; ", file);
7400 output_operand_lossage ("invalid operand code `%c'", code);
7404 if (GET_CODE (x) == REG)
7406 PRINT_REG (x, code, file);
7409 else if (GET_CODE (x) == MEM)
7411 /* No `byte ptr' prefix for call instructions. */
7412 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7415 switch (GET_MODE_SIZE (GET_MODE (x)))
7417 case 1: size = "BYTE"; break;
7418 case 2: size = "WORD"; break;
7419 case 4: size = "DWORD"; break;
7420 case 8: size = "QWORD"; break;
7421 case 12: size = "XWORD"; break;
7422 case 16: size = "XMMWORD"; break;
7427 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7430 else if (code == 'w')
7432 else if (code == 'k')
7436 fputs (" PTR ", file);
7440 if (flag_pic && CONSTANT_ADDRESS_P (x))
7441 output_pic_addr_const (file, x, code);
7442 /* Avoid (%rip) for call operands. */
7443 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7444 && GET_CODE (x) != CONST_INT)
7445 output_addr_const (file, x);
7446 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7447 output_operand_lossage ("invalid constraints for operand");
7452 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7457 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7458 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7460 if (ASSEMBLER_DIALECT == ASM_ATT)
7462 fprintf (file, "0x%lx", l);
7465 /* These float cases don't actually occur as immediate operands. */
7466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7470 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7471 fprintf (file, "%s", dstr);
7474 else if (GET_CODE (x) == CONST_DOUBLE
7475 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7479 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7480 fprintf (file, "%s", dstr);
7487 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7489 if (ASSEMBLER_DIALECT == ASM_ATT)
7492 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7493 || GET_CODE (x) == LABEL_REF)
7495 if (ASSEMBLER_DIALECT == ASM_ATT)
7498 fputs ("OFFSET FLAT:", file);
7501 if (GET_CODE (x) == CONST_INT)
7502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7504 output_pic_addr_const (file, x, code);
7506 output_addr_const (file, x);
7510 /* Print a memory operand whose address is ADDR. */
7513 print_operand_address (file, addr)
7517 struct ix86_address parts;
7518 rtx base, index, disp;
7521 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7523 if (ASSEMBLER_DIALECT == ASM_INTEL)
7524 fputs ("DWORD PTR ", file);
7525 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7528 fputs ("fs:0", file);
7530 fputs ("gs:0", file);
7534 if (! ix86_decompose_address (addr, &parts))
7538 index = parts.index;
7540 scale = parts.scale;
7542 if (!base && !index)
7544 /* Displacement only requires special attention. */
7546 if (GET_CODE (disp) == CONST_INT)
7548 if (ASSEMBLER_DIALECT == ASM_INTEL)
7550 if (USER_LABEL_PREFIX[0] == 0)
7552 fputs ("ds:", file);
7554 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7557 output_pic_addr_const (file, addr, 0);
7559 output_addr_const (file, addr);
7561 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7563 && ((GET_CODE (addr) == SYMBOL_REF
7564 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7565 || GET_CODE (addr) == LABEL_REF
7566 || (GET_CODE (addr) == CONST
7567 && GET_CODE (XEXP (addr, 0)) == PLUS
7568 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7569 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7570 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7571 fputs ("(%rip)", file);
7575 if (ASSEMBLER_DIALECT == ASM_ATT)
7580 output_pic_addr_const (file, disp, 0);
7581 else if (GET_CODE (disp) == LABEL_REF)
7582 output_asm_label (disp);
7584 output_addr_const (file, disp);
7589 PRINT_REG (base, 0, file);
7593 PRINT_REG (index, 0, file);
7595 fprintf (file, ",%d", scale);
7601 rtx offset = NULL_RTX;
7605 /* Pull out the offset of a symbol; print any symbol itself. */
7606 if (GET_CODE (disp) == CONST
7607 && GET_CODE (XEXP (disp, 0)) == PLUS
7608 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7610 offset = XEXP (XEXP (disp, 0), 1);
7611 disp = gen_rtx_CONST (VOIDmode,
7612 XEXP (XEXP (disp, 0), 0));
7616 output_pic_addr_const (file, disp, 0);
7617 else if (GET_CODE (disp) == LABEL_REF)
7618 output_asm_label (disp);
7619 else if (GET_CODE (disp) == CONST_INT)
7622 output_addr_const (file, disp);
7628 PRINT_REG (base, 0, file);
7631 if (INTVAL (offset) >= 0)
7633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7644 PRINT_REG (index, 0, file);
7646 fprintf (file, "*%d", scale);
7654 output_addr_const_extra (file, x)
7660 if (GET_CODE (x) != UNSPEC)
7663 op = XVECEXP (x, 0, 0);
7664 switch (XINT (x, 1))
7666 case UNSPEC_GOTTPOFF:
7667 output_addr_const (file, op);
7668 /* FIXME: This might be @TPOFF in Sun ld. */
7669 fputs ("@GOTTPOFF", file);
7672 output_addr_const (file, op);
7673 fputs ("@TPOFF", file);
7676 output_addr_const (file, op);
7678 fputs ("@TPOFF", file);
7680 fputs ("@NTPOFF", file);
7683 output_addr_const (file, op);
7684 fputs ("@DTPOFF", file);
7686 case UNSPEC_GOTNTPOFF:
7687 output_addr_const (file, op);
7689 fputs ("@GOTTPOFF(%rip)", file);
7691 fputs ("@GOTNTPOFF", file);
7693 case UNSPEC_INDNTPOFF:
7694 output_addr_const (file, op);
7695 fputs ("@INDNTPOFF", file);
7705 /* Split one or more DImode RTL references into pairs of SImode
7706 references. The RTL can be REG, offsettable MEM, integer constant, or
7707 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7708 split and "num" is its length. lo_half and hi_half are output arrays
7709 that parallel "operands". */
7712 split_di (operands, num, lo_half, hi_half)
7715 rtx lo_half[], hi_half[];
7719 rtx op = operands[num];
7721 /* simplify_subreg refuse to split volatile memory addresses,
7722 but we still have to handle it. */
7723 if (GET_CODE (op) == MEM)
7725 lo_half[num] = adjust_address (op, SImode, 0);
7726 hi_half[num] = adjust_address (op, SImode, 4);
7730 lo_half[num] = simplify_gen_subreg (SImode, op,
7731 GET_MODE (op) == VOIDmode
7732 ? DImode : GET_MODE (op), 0);
7733 hi_half[num] = simplify_gen_subreg (SImode, op,
7734 GET_MODE (op) == VOIDmode
7735 ? DImode : GET_MODE (op), 4);
7739 /* Split one or more TImode RTL references into pairs of SImode
7740 references. The RTL can be REG, offsettable MEM, integer constant, or
7741 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7742 split and "num" is its length. lo_half and hi_half are output arrays
7743 that parallel "operands". */
7746 split_ti (operands, num, lo_half, hi_half)
7749 rtx lo_half[], hi_half[];
7753 rtx op = operands[num];
7755 /* simplify_subreg refuse to split volatile memory addresses, but we
7756 still have to handle it. */
7757 if (GET_CODE (op) == MEM)
7759 lo_half[num] = adjust_address (op, DImode, 0);
7760 hi_half[num] = adjust_address (op, DImode, 8);
7764 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7765 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7770 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7771 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7772 is the expression of the binary operation. The output may either be
7773 emitted here, or returned to the caller, like all output_* functions.
7775 There is no guarantee that the operands are the same mode, as they
7776 might be within FLOAT or FLOAT_EXTEND expressions. */
7778 #ifndef SYSV386_COMPAT
7779 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7780 wants to fix the assemblers because that causes incompatibility
7781 with gcc. No-one wants to fix gcc because that causes
7782 incompatibility with assemblers... You can use the option of
7783 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7784 #define SYSV386_COMPAT 1
7788 output_387_binary_op (insn, operands)
7792 static char buf[30];
7795 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7797 #ifdef ENABLE_CHECKING
7798 /* Even if we do not want to check the inputs, this documents input
7799 constraints. Which helps in understanding the following code. */
7800 if (STACK_REG_P (operands[0])
7801 && ((REG_P (operands[1])
7802 && REGNO (operands[0]) == REGNO (operands[1])
7803 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7804 || (REG_P (operands[2])
7805 && REGNO (operands[0]) == REGNO (operands[2])
7806 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7807 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7813 switch (GET_CODE (operands[3]))
7816 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7817 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7825 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7826 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7834 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7835 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7843 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7844 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7858 if (GET_MODE (operands[0]) == SFmode)
7859 strcat (buf, "ss\t{%2, %0|%0, %2}");
7861 strcat (buf, "sd\t{%2, %0|%0, %2}");
7866 switch (GET_CODE (operands[3]))
7870 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7872 rtx temp = operands[2];
7873 operands[2] = operands[1];
7877 /* know operands[0] == operands[1]. */
7879 if (GET_CODE (operands[2]) == MEM)
7885 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7887 if (STACK_TOP_P (operands[0]))
7888 /* How is it that we are storing to a dead operand[2]?
7889 Well, presumably operands[1] is dead too. We can't
7890 store the result to st(0) as st(0) gets popped on this
7891 instruction. Instead store to operands[2] (which I
7892 think has to be st(1)). st(1) will be popped later.
7893 gcc <= 2.8.1 didn't have this check and generated
7894 assembly code that the Unixware assembler rejected. */
7895 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7897 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7901 if (STACK_TOP_P (operands[0]))
7902 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7904 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7909 if (GET_CODE (operands[1]) == MEM)
7915 if (GET_CODE (operands[2]) == MEM)
7921 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7924 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7925 derived assemblers, confusingly reverse the direction of
7926 the operation for fsub{r} and fdiv{r} when the
7927 destination register is not st(0). The Intel assembler
7928 doesn't have this brain damage. Read !SYSV386_COMPAT to
7929 figure out what the hardware really does. */
7930 if (STACK_TOP_P (operands[0]))
7931 p = "{p\t%0, %2|rp\t%2, %0}";
7933 p = "{rp\t%2, %0|p\t%0, %2}";
7935 if (STACK_TOP_P (operands[0]))
7936 /* As above for fmul/fadd, we can't store to st(0). */
7937 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7939 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7944 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7947 if (STACK_TOP_P (operands[0]))
7948 p = "{rp\t%0, %1|p\t%1, %0}";
7950 p = "{p\t%1, %0|rp\t%0, %1}";
7952 if (STACK_TOP_P (operands[0]))
7953 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7955 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7960 if (STACK_TOP_P (operands[0]))
7962 if (STACK_TOP_P (operands[1]))
7963 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7965 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7968 else if (STACK_TOP_P (operands[1]))
7971 p = "{\t%1, %0|r\t%0, %1}";
7973 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7979 p = "{r\t%2, %0|\t%0, %2}";
7981 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7994 /* Output code to initialize control word copies used by
7995 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7996 is set to control word rounding downwards. */
7998 emit_i387_cw_initialization (normal, round_down)
7999 rtx normal, round_down;
8001 rtx reg = gen_reg_rtx (HImode);
8003 emit_insn (gen_x86_fnstcw_1 (normal));
8004 emit_move_insn (reg, normal);
8005 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8007 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8009 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8010 emit_move_insn (round_down, reg);
8013 /* Output code for INSN to convert a float to a signed int. OPERANDS
8014 are the insn operands. The output may be [HSD]Imode and the input
8015 operand may be [SDX]Fmode. */
8018 output_fix_trunc (insn, operands)
8022 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8023 int dimode_p = GET_MODE (operands[0]) == DImode;
8025 /* Jump through a hoop or two for DImode, since the hardware has no
8026 non-popping instruction. We used to do this a different way, but
8027 that was somewhat fragile and broke with post-reload splitters. */
8028 if (dimode_p && !stack_top_dies)
8029 output_asm_insn ("fld\t%y1", operands);
8031 if (!STACK_TOP_P (operands[1]))
8034 if (GET_CODE (operands[0]) != MEM)
8037 output_asm_insn ("fldcw\t%3", operands);
8038 if (stack_top_dies || dimode_p)
8039 output_asm_insn ("fistp%z0\t%0", operands);
8041 output_asm_insn ("fist%z0\t%0", operands);
8042 output_asm_insn ("fldcw\t%2", operands);
8047 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8048 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8049 when fucom should be used. */
8052 output_fp_compare (insn, operands, eflags_p, unordered_p)
8055 int eflags_p, unordered_p;
8058 rtx cmp_op0 = operands[0];
8059 rtx cmp_op1 = operands[1];
8060 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8065 cmp_op1 = operands[2];
8069 if (GET_MODE (operands[0]) == SFmode)
8071 return "ucomiss\t{%1, %0|%0, %1}";
8073 return "comiss\t{%1, %0|%0, %1}";
8076 return "ucomisd\t{%1, %0|%0, %1}";
8078 return "comisd\t{%1, %0|%0, %1}";
8081 if (! STACK_TOP_P (cmp_op0))
8084 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8086 if (STACK_REG_P (cmp_op1)
8088 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8089 && REGNO (cmp_op1) != FIRST_STACK_REG)
8091 /* If both the top of the 387 stack dies, and the other operand
8092 is also a stack register that dies, then this must be a
8093 `fcompp' float compare */
8097 /* There is no double popping fcomi variant. Fortunately,
8098 eflags is immune from the fstp's cc clobbering. */
8100 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8102 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8110 return "fucompp\n\tfnstsw\t%0";
8112 return "fcompp\n\tfnstsw\t%0";
8125 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8127 static const char * const alt[24] =
8139 "fcomi\t{%y1, %0|%0, %y1}",
8140 "fcomip\t{%y1, %0|%0, %y1}",
8141 "fucomi\t{%y1, %0|%0, %y1}",
8142 "fucomip\t{%y1, %0|%0, %y1}",
8149 "fcom%z2\t%y2\n\tfnstsw\t%0",
8150 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8151 "fucom%z2\t%y2\n\tfnstsw\t%0",
8152 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8154 "ficom%z2\t%y2\n\tfnstsw\t%0",
8155 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8163 mask = eflags_p << 3;
8164 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8165 mask |= unordered_p << 1;
8166 mask |= stack_top_dies;
8179 ix86_output_addr_vec_elt (file, value)
8183 const char *directive = ASM_LONG;
8188 directive = ASM_QUAD;
8194 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8198 ix86_output_addr_diff_elt (file, value, rel)
8203 fprintf (file, "%s%s%d-%s%d\n",
8204 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8205 else if (HAVE_AS_GOTOFF_IN_DATA)
8206 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8208 else if (TARGET_MACHO)
8209 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8210 machopic_function_base_name () + 1);
8213 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8214 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8217 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8221 ix86_expand_clear (dest)
8226 /* We play register width games, which are only valid after reload. */
8227 if (!reload_completed)
8230 /* Avoid HImode and its attendant prefix byte. */
8231 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8232 dest = gen_rtx_REG (SImode, REGNO (dest));
8234 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8236 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8237 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8239 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8240 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8246 /* X is an unchanging MEM. If it is a constant pool reference, return
8247 the constant pool rtx, else NULL. */
8250 maybe_get_pool_constant (x)
8253 x = ix86_delegitimize_address (XEXP (x, 0));
8255 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8256 return get_pool_constant (x);
8262 ix86_expand_move (mode, operands)
8263 enum machine_mode mode;
8266 int strict = (reload_in_progress || reload_completed);
8267 rtx insn, op0, op1, tmp;
8272 if (tls_symbolic_operand (op1, Pmode))
8274 op1 = legitimize_address (op1, op1, VOIDmode);
8275 if (GET_CODE (op0) == MEM)
8277 tmp = gen_reg_rtx (mode);
8278 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8282 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8287 rtx temp = ((reload_in_progress
8288 || ((op0 && GET_CODE (op0) == REG)
8290 ? op0 : gen_reg_rtx (Pmode));
8291 op1 = machopic_indirect_data_reference (op1, temp);
8292 op1 = machopic_legitimize_pic_address (op1, mode,
8293 temp == op1 ? 0 : temp);
8297 if (MACHOPIC_INDIRECT)
8298 op1 = machopic_indirect_data_reference (op1, 0);
8302 insn = gen_rtx_SET (VOIDmode, op0, op1);
8306 #endif /* TARGET_MACHO */
8307 if (GET_CODE (op0) == MEM)
8308 op1 = force_reg (Pmode, op1);
8312 if (GET_CODE (temp) != REG)
8313 temp = gen_reg_rtx (Pmode);
8314 temp = legitimize_pic_address (op1, temp);
8322 if (GET_CODE (op0) == MEM
8323 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8324 || !push_operand (op0, mode))
8325 && GET_CODE (op1) == MEM)
8326 op1 = force_reg (mode, op1);
8328 if (push_operand (op0, mode)
8329 && ! general_no_elim_operand (op1, mode))
8330 op1 = copy_to_mode_reg (mode, op1);
8332 /* Force large constants in 64bit compilation into register
8333 to get them CSEed. */
8334 if (TARGET_64BIT && mode == DImode
8335 && immediate_operand (op1, mode)
8336 && !x86_64_zero_extended_value (op1)
8337 && !register_operand (op0, mode)
8338 && optimize && !reload_completed && !reload_in_progress)
8339 op1 = copy_to_mode_reg (mode, op1);
8341 if (FLOAT_MODE_P (mode))
8343 /* If we are loading a floating point constant to a register,
8344 force the value to memory now, since we'll get better code
8345 out the back end. */
8349 else if (GET_CODE (op1) == CONST_DOUBLE
8350 && register_operand (op0, mode))
8351 op1 = validize_mem (force_const_mem (mode, op1));
8355 insn = gen_rtx_SET (VOIDmode, op0, op1);
8361 ix86_expand_vector_move (mode, operands)
8362 enum machine_mode mode;
8365 /* Force constants other than zero into memory. We do not know how
8366 the instructions used to build constants modify the upper 64 bits
8367 of the register, once we have that information we may be able
8368 to handle some of them more efficiently. */
8369 if ((reload_in_progress | reload_completed) == 0
8370 && register_operand (operands[0], mode)
8371 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8372 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8374 /* Make operand1 a register if it isn't already. */
8376 && !register_operand (operands[0], mode)
8377 && !register_operand (operands[1], mode))
8379 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8380 emit_move_insn (operands[0], temp);
8384 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8387 /* Attempt to expand a binary operator. Make the expansion closer to the
8388 actual machine, then just general_operand, which will allow 3 separate
8389 memory references (one output, two input) in a single insn. */
8392 ix86_expand_binary_operator (code, mode, operands)
8394 enum machine_mode mode;
8397 int matching_memory;
8398 rtx src1, src2, dst, op, clob;
8404 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8405 if (GET_RTX_CLASS (code) == 'c'
8406 && (rtx_equal_p (dst, src2)
8407 || immediate_operand (src1, mode)))
8414 /* If the destination is memory, and we do not have matching source
8415 operands, do things in registers. */
8416 matching_memory = 0;
8417 if (GET_CODE (dst) == MEM)
8419 if (rtx_equal_p (dst, src1))
8420 matching_memory = 1;
8421 else if (GET_RTX_CLASS (code) == 'c'
8422 && rtx_equal_p (dst, src2))
8423 matching_memory = 2;
8425 dst = gen_reg_rtx (mode);
8428 /* Both source operands cannot be in memory. */
8429 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8431 if (matching_memory != 2)
8432 src2 = force_reg (mode, src2);
8434 src1 = force_reg (mode, src1);
8437 /* If the operation is not commutable, source 1 cannot be a constant
8438 or non-matching memory. */
8439 if ((CONSTANT_P (src1)
8440 || (!matching_memory && GET_CODE (src1) == MEM))
8441 && GET_RTX_CLASS (code) != 'c')
8442 src1 = force_reg (mode, src1);
8444 /* If optimizing, copy to regs to improve CSE */
8445 if (optimize && ! no_new_pseudos)
8447 if (GET_CODE (dst) == MEM)
8448 dst = gen_reg_rtx (mode);
8449 if (GET_CODE (src1) == MEM)
8450 src1 = force_reg (mode, src1);
8451 if (GET_CODE (src2) == MEM)
8452 src2 = force_reg (mode, src2);
8455 /* Emit the instruction. */
8457 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8458 if (reload_in_progress)
8460 /* Reload doesn't know about the flags register, and doesn't know that
8461 it doesn't want to clobber it. We can only do this with PLUS. */
8468 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8469 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8472 /* Fix up the destination if needed. */
8473 if (dst != operands[0])
8474 emit_move_insn (operands[0], dst);
8477 /* Return TRUE or FALSE depending on whether the binary operator meets the
8478 appropriate constraints. */
8481 ix86_binary_operator_ok (code, mode, operands)
8483 enum machine_mode mode ATTRIBUTE_UNUSED;
8486 /* Both source operands cannot be in memory. */
8487 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8489 /* If the operation is not commutable, source 1 cannot be a constant. */
8490 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8492 /* If the destination is memory, we must have a matching source operand. */
8493 if (GET_CODE (operands[0]) == MEM
8494 && ! (rtx_equal_p (operands[0], operands[1])
8495 || (GET_RTX_CLASS (code) == 'c'
8496 && rtx_equal_p (operands[0], operands[2]))))
8498 /* If the operation is not commutable and the source 1 is memory, we must
8499 have a matching destination. */
8500 if (GET_CODE (operands[1]) == MEM
8501 && GET_RTX_CLASS (code) != 'c'
8502 && ! rtx_equal_p (operands[0], operands[1]))
8507 /* Attempt to expand a unary operator. Make the expansion closer to the
8508 actual machine, then just general_operand, which will allow 2 separate
8509 memory references (one output, one input) in a single insn. */
8512 ix86_expand_unary_operator (code, mode, operands)
8514 enum machine_mode mode;
8517 int matching_memory;
8518 rtx src, dst, op, clob;
8523 /* If the destination is memory, and we do not have matching source
8524 operands, do things in registers. */
8525 matching_memory = 0;
8526 if (GET_CODE (dst) == MEM)
8528 if (rtx_equal_p (dst, src))
8529 matching_memory = 1;
8531 dst = gen_reg_rtx (mode);
8534 /* When source operand is memory, destination must match. */
8535 if (!matching_memory && GET_CODE (src) == MEM)
8536 src = force_reg (mode, src);
8538 /* If optimizing, copy to regs to improve CSE */
8539 if (optimize && ! no_new_pseudos)
8541 if (GET_CODE (dst) == MEM)
8542 dst = gen_reg_rtx (mode);
8543 if (GET_CODE (src) == MEM)
8544 src = force_reg (mode, src);
8547 /* Emit the instruction. */
8549 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8550 if (reload_in_progress || code == NOT)
8552 /* Reload doesn't know about the flags register, and doesn't know that
8553 it doesn't want to clobber it. */
8560 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8561 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8564 /* Fix up the destination if needed. */
8565 if (dst != operands[0])
8566 emit_move_insn (operands[0], dst);
8569 /* Return TRUE or FALSE depending on whether the unary operator meets the
8570 appropriate constraints. */
8573 ix86_unary_operator_ok (code, mode, operands)
8574 enum rtx_code code ATTRIBUTE_UNUSED;
8575 enum machine_mode mode ATTRIBUTE_UNUSED;
8576 rtx operands[2] ATTRIBUTE_UNUSED;
8578 /* If one of operands is memory, source and destination must match. */
8579 if ((GET_CODE (operands[0]) == MEM
8580 || GET_CODE (operands[1]) == MEM)
8581 && ! rtx_equal_p (operands[0], operands[1]))
8586 /* Return TRUE or FALSE depending on whether the first SET in INSN
8587 has source and destination with matching CC modes, and that the
8588 CC mode is at least as constrained as REQ_MODE. */
8591 ix86_match_ccmode (insn, req_mode)
8593 enum machine_mode req_mode;
8596 enum machine_mode set_mode;
8598 set = PATTERN (insn);
8599 if (GET_CODE (set) == PARALLEL)
8600 set = XVECEXP (set, 0, 0);
8601 if (GET_CODE (set) != SET)
8603 if (GET_CODE (SET_SRC (set)) != COMPARE)
8606 set_mode = GET_MODE (SET_DEST (set));
8610 if (req_mode != CCNOmode
8611 && (req_mode != CCmode
8612 || XEXP (SET_SRC (set), 1) != const0_rtx))
8616 if (req_mode == CCGCmode)
8620 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8624 if (req_mode == CCZmode)
8634 return (GET_MODE (SET_SRC (set)) == set_mode);
8637 /* Generate insn patterns to do an integer compare of OPERANDS. */
8640 ix86_expand_int_compare (code, op0, op1)
8644 enum machine_mode cmpmode;
8647 cmpmode = SELECT_CC_MODE (code, op0, op1);
8648 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8650 /* This is very simple, but making the interface the same as in the
8651 FP case makes the rest of the code easier. */
8652 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8653 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8655 /* Return the test that should be put into the flags user, i.e.
8656 the bcc, scc, or cmov instruction. */
8657 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8660 /* Figure out whether to use ordered or unordered fp comparisons.
8661 Return the appropriate mode to use. */
8664 ix86_fp_compare_mode (code)
8665 enum rtx_code code ATTRIBUTE_UNUSED;
8667 /* ??? In order to make all comparisons reversible, we do all comparisons
8668 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8669 all forms trapping and nontrapping comparisons, we can make inequality
8670 comparisons trapping again, since it results in better code when using
8671 FCOM based compares. */
8672 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8676 ix86_cc_mode (code, op0, op1)
8680 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8681 return ix86_fp_compare_mode (code);
8684 /* Only zero flag is needed. */
8686 case NE: /* ZF!=0 */
8688 /* Codes needing carry flag. */
8689 case GEU: /* CF=0 */
8690 case GTU: /* CF=0 & ZF=0 */
8691 case LTU: /* CF=1 */
8692 case LEU: /* CF=1 | ZF=1 */
8694 /* Codes possibly doable only with sign flag when
8695 comparing against zero. */
8696 case GE: /* SF=OF or SF=0 */
8697 case LT: /* SF<>OF or SF=1 */
8698 if (op1 == const0_rtx)
8701 /* For other cases Carry flag is not required. */
8703 /* Codes doable only with sign flag when comparing
8704 against zero, but we miss jump instruction for it
8705 so we need to use relational tests against overflow
8706 that thus needs to be zero. */
8707 case GT: /* ZF=0 & SF=OF */
8708 case LE: /* ZF=1 | SF<>OF */
8709 if (op1 == const0_rtx)
8713 /* strcmp pattern do (use flags) and combine may ask us for proper
8722 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8725 ix86_use_fcomi_compare (code)
8726 enum rtx_code code ATTRIBUTE_UNUSED;
8728 enum rtx_code swapped_code = swap_condition (code);
8729 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8730 || (ix86_fp_comparison_cost (swapped_code)
8731 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8734 /* Swap, force into registers, or otherwise massage the two operands
8735 to a fp comparison. The operands are updated in place; the new
8736 comparison code is returned. */
8738 static enum rtx_code
8739 ix86_prepare_fp_compare_args (code, pop0, pop1)
8743 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8744 rtx op0 = *pop0, op1 = *pop1;
8745 enum machine_mode op_mode = GET_MODE (op0);
8746 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8748 /* All of the unordered compare instructions only work on registers.
8749 The same is true of the XFmode compare instructions. The same is
8750 true of the fcomi compare instructions. */
8753 && (fpcmp_mode == CCFPUmode
8754 || op_mode == XFmode
8755 || op_mode == TFmode
8756 || ix86_use_fcomi_compare (code)))
8758 op0 = force_reg (op_mode, op0);
8759 op1 = force_reg (op_mode, op1);
8763 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8764 things around if they appear profitable, otherwise force op0
8767 if (standard_80387_constant_p (op0) == 0
8768 || (GET_CODE (op0) == MEM
8769 && ! (standard_80387_constant_p (op1) == 0
8770 || GET_CODE (op1) == MEM)))
8773 tmp = op0, op0 = op1, op1 = tmp;
8774 code = swap_condition (code);
8777 if (GET_CODE (op0) != REG)
8778 op0 = force_reg (op_mode, op0);
8780 if (CONSTANT_P (op1))
8782 if (standard_80387_constant_p (op1))
8783 op1 = force_reg (op_mode, op1);
8785 op1 = validize_mem (force_const_mem (op_mode, op1));
8789 /* Try to rearrange the comparison to make it cheaper. */
8790 if (ix86_fp_comparison_cost (code)
8791 > ix86_fp_comparison_cost (swap_condition (code))
8792 && (GET_CODE (op1) == REG || !no_new_pseudos))
8795 tmp = op0, op0 = op1, op1 = tmp;
8796 code = swap_condition (code);
8797 if (GET_CODE (op0) != REG)
8798 op0 = force_reg (op_mode, op0);
8806 /* Convert comparison codes we use to represent FP comparison to integer
8807 code that will result in proper branch. Return UNKNOWN if no such code
8809 static enum rtx_code
8810 ix86_fp_compare_code_to_integer (code)
8840 /* Split comparison code CODE into comparisons we can do using branch
8841 instructions. BYPASS_CODE is comparison code for branch that will
8842 branch around FIRST_CODE and SECOND_CODE. If some of branches
8843 is not required, set value to NIL.
8844 We never require more than two branches. */
8846 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8847 enum rtx_code code, *bypass_code, *first_code, *second_code;
8853 /* The fcomi comparison sets flags as follows:
8863 case GT: /* GTU - CF=0 & ZF=0 */
8864 case GE: /* GEU - CF=0 */
8865 case ORDERED: /* PF=0 */
8866 case UNORDERED: /* PF=1 */
8867 case UNEQ: /* EQ - ZF=1 */
8868 case UNLT: /* LTU - CF=1 */
8869 case UNLE: /* LEU - CF=1 | ZF=1 */
8870 case LTGT: /* EQ - ZF=0 */
8872 case LT: /* LTU - CF=1 - fails on unordered */
8874 *bypass_code = UNORDERED;
8876 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8878 *bypass_code = UNORDERED;
8880 case EQ: /* EQ - ZF=1 - fails on unordered */
8882 *bypass_code = UNORDERED;
8884 case NE: /* NE - ZF=0 - fails on unordered */
8886 *second_code = UNORDERED;
8888 case UNGE: /* GEU - CF=0 - fails on unordered */
8890 *second_code = UNORDERED;
8892 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8894 *second_code = UNORDERED;
8899 if (!TARGET_IEEE_FP)
8906 /* Return cost of comparison done fcom + arithmetics operations on AX.
8907 All following functions do use number of instructions as a cost metrics.
8908 In future this should be tweaked to compute bytes for optimize_size and
8909 take into account performance of various instructions on various CPUs. */
8911 ix86_fp_comparison_arithmetics_cost (code)
8914 if (!TARGET_IEEE_FP)
8916 /* The cost of code output by ix86_expand_fp_compare. */
8944 /* Return cost of comparison done using fcomi operation.
8945 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8947 ix86_fp_comparison_fcomi_cost (code)
8950 enum rtx_code bypass_code, first_code, second_code;
8951 /* Return arbitrarily high cost when instruction is not supported - this
8952 prevents gcc from using it. */
8955 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8956 return (bypass_code != NIL || second_code != NIL) + 2;
8959 /* Return cost of comparison done using sahf operation.
8960 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8962 ix86_fp_comparison_sahf_cost (code)
8965 enum rtx_code bypass_code, first_code, second_code;
8966 /* Return arbitrarily high cost when instruction is not preferred - this
8967 avoids gcc from using it. */
8968 if (!TARGET_USE_SAHF && !optimize_size)
8970 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8971 return (bypass_code != NIL || second_code != NIL) + 3;
8974 /* Compute cost of the comparison done using any method.
8975 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8977 ix86_fp_comparison_cost (code)
8980 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8983 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8984 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8986 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8987 if (min > sahf_cost)
8989 if (min > fcomi_cost)
8994 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8997 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8999 rtx op0, op1, scratch;
9003 enum machine_mode fpcmp_mode, intcmp_mode;
9005 int cost = ix86_fp_comparison_cost (code);
9006 enum rtx_code bypass_code, first_code, second_code;
9008 fpcmp_mode = ix86_fp_compare_mode (code);
9009 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9012 *second_test = NULL_RTX;
9014 *bypass_test = NULL_RTX;
9016 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9018 /* Do fcomi/sahf based test when profitable. */
9019 if ((bypass_code == NIL || bypass_test)
9020 && (second_code == NIL || second_test)
9021 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9025 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9026 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9032 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9033 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9035 scratch = gen_reg_rtx (HImode);
9036 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9037 emit_insn (gen_x86_sahf_1 (scratch));
9040 /* The FP codes work out to act like unsigned. */
9041 intcmp_mode = fpcmp_mode;
9043 if (bypass_code != NIL)
9044 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9045 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9047 if (second_code != NIL)
9048 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9049 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9054 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9055 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9056 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9058 scratch = gen_reg_rtx (HImode);
9059 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9061 /* In the unordered case, we have to check C2 for NaN's, which
9062 doesn't happen to work out to anything nice combination-wise.
9063 So do some bit twiddling on the value we've got in AH to come
9064 up with an appropriate set of condition codes. */
9066 intcmp_mode = CCNOmode;
9071 if (code == GT || !TARGET_IEEE_FP)
9073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9078 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9079 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9080 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9081 intcmp_mode = CCmode;
9087 if (code == LT && TARGET_IEEE_FP)
9089 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9090 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9091 intcmp_mode = CCmode;
9096 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9102 if (code == GE || !TARGET_IEEE_FP)
9104 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9109 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9110 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9117 if (code == LE && TARGET_IEEE_FP)
9119 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9120 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9121 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9122 intcmp_mode = CCmode;
9127 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9133 if (code == EQ && TARGET_IEEE_FP)
9135 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9136 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9137 intcmp_mode = CCmode;
9142 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9149 if (code == NE && TARGET_IEEE_FP)
9151 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9152 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9158 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9168 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9177 /* Return the test that should be put into the flags user, i.e.
9178 the bcc, scc, or cmov instruction. */
9179 return gen_rtx_fmt_ee (code, VOIDmode,
9180 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9185 ix86_expand_compare (code, second_test, bypass_test)
9187 rtx *second_test, *bypass_test;
9190 op0 = ix86_compare_op0;
9191 op1 = ix86_compare_op1;
9194 *second_test = NULL_RTX;
9196 *bypass_test = NULL_RTX;
9198 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9199 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9200 second_test, bypass_test);
9202 ret = ix86_expand_int_compare (code, op0, op1);
9207 /* Return true if the CODE will result in nontrivial jump sequence. */
9209 ix86_fp_jump_nontrivial_p (code)
9212 enum rtx_code bypass_code, first_code, second_code;
9215 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9216 return bypass_code != NIL || second_code != NIL;
9220 ix86_expand_branch (code, label)
9226 switch (GET_MODE (ix86_compare_op0))
9232 tmp = ix86_expand_compare (code, NULL, NULL);
9233 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9234 gen_rtx_LABEL_REF (VOIDmode, label),
9236 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9246 enum rtx_code bypass_code, first_code, second_code;
9248 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9251 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9253 /* Check whether we will use the natural sequence with one jump. If
9254 so, we can expand jump early. Otherwise delay expansion by
9255 creating compound insn to not confuse optimizers. */
9256 if (bypass_code == NIL && second_code == NIL
9259 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9260 gen_rtx_LABEL_REF (VOIDmode, label),
9265 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9266 ix86_compare_op0, ix86_compare_op1);
9267 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9268 gen_rtx_LABEL_REF (VOIDmode, label),
9270 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9272 use_fcomi = ix86_use_fcomi_compare (code);
9273 vec = rtvec_alloc (3 + !use_fcomi);
9274 RTVEC_ELT (vec, 0) = tmp;
9276 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9278 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9281 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9283 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9291 /* Expand DImode branch into multiple compare+branch. */
9293 rtx lo[2], hi[2], label2;
9294 enum rtx_code code1, code2, code3;
9296 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9298 tmp = ix86_compare_op0;
9299 ix86_compare_op0 = ix86_compare_op1;
9300 ix86_compare_op1 = tmp;
9301 code = swap_condition (code);
9303 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9304 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9306 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9307 avoid two branches. This costs one extra insn, so disable when
9308 optimizing for size. */
9310 if ((code == EQ || code == NE)
9312 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9317 if (hi[1] != const0_rtx)
9318 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9319 NULL_RTX, 0, OPTAB_WIDEN);
9322 if (lo[1] != const0_rtx)
9323 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9324 NULL_RTX, 0, OPTAB_WIDEN);
9326 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9327 NULL_RTX, 0, OPTAB_WIDEN);
9329 ix86_compare_op0 = tmp;
9330 ix86_compare_op1 = const0_rtx;
9331 ix86_expand_branch (code, label);
9335 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9336 op1 is a constant and the low word is zero, then we can just
9337 examine the high word. */
9339 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9342 case LT: case LTU: case GE: case GEU:
9343 ix86_compare_op0 = hi[0];
9344 ix86_compare_op1 = hi[1];
9345 ix86_expand_branch (code, label);
9351 /* Otherwise, we need two or three jumps. */
9353 label2 = gen_label_rtx ();
9356 code2 = swap_condition (code);
9357 code3 = unsigned_condition (code);
9361 case LT: case GT: case LTU: case GTU:
9364 case LE: code1 = LT; code2 = GT; break;
9365 case GE: code1 = GT; code2 = LT; break;
9366 case LEU: code1 = LTU; code2 = GTU; break;
9367 case GEU: code1 = GTU; code2 = LTU; break;
9369 case EQ: code1 = NIL; code2 = NE; break;
9370 case NE: code2 = NIL; break;
9378 * if (hi(a) < hi(b)) goto true;
9379 * if (hi(a) > hi(b)) goto false;
9380 * if (lo(a) < lo(b)) goto true;
9384 ix86_compare_op0 = hi[0];
9385 ix86_compare_op1 = hi[1];
9388 ix86_expand_branch (code1, label);
9390 ix86_expand_branch (code2, label2);
9392 ix86_compare_op0 = lo[0];
9393 ix86_compare_op1 = lo[1];
9394 ix86_expand_branch (code3, label);
9397 emit_label (label2);
9406 /* Split branch based on floating point condition. */
9408 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9410 rtx op1, op2, target1, target2, tmp;
9413 rtx label = NULL_RTX;
9415 int bypass_probability = -1, second_probability = -1, probability = -1;
9418 if (target2 != pc_rtx)
9421 code = reverse_condition_maybe_unordered (code);
9426 condition = ix86_expand_fp_compare (code, op1, op2,
9427 tmp, &second, &bypass);
9429 if (split_branch_probability >= 0)
9431 /* Distribute the probabilities across the jumps.
9432 Assume the BYPASS and SECOND to be always test
9434 probability = split_branch_probability;
9436 /* Value of 1 is low enough to make no need for probability
9437 to be updated. Later we may run some experiments and see
9438 if unordered values are more frequent in practice. */
9440 bypass_probability = 1;
9442 second_probability = 1;
9444 if (bypass != NULL_RTX)
9446 label = gen_label_rtx ();
9447 i = emit_jump_insn (gen_rtx_SET
9449 gen_rtx_IF_THEN_ELSE (VOIDmode,
9451 gen_rtx_LABEL_REF (VOIDmode,
9454 if (bypass_probability >= 0)
9456 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9457 GEN_INT (bypass_probability),
9460 i = emit_jump_insn (gen_rtx_SET
9462 gen_rtx_IF_THEN_ELSE (VOIDmode,
9463 condition, target1, target2)));
9464 if (probability >= 0)
9466 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9467 GEN_INT (probability),
9469 if (second != NULL_RTX)
9471 i = emit_jump_insn (gen_rtx_SET
9473 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9475 if (second_probability >= 0)
9477 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9478 GEN_INT (second_probability),
9481 if (label != NULL_RTX)
9486 ix86_expand_setcc (code, dest)
9490 rtx ret, tmp, tmpreg;
9491 rtx second_test, bypass_test;
9493 if (GET_MODE (ix86_compare_op0) == DImode
9495 return 0; /* FAIL */
9497 if (GET_MODE (dest) != QImode)
9500 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9501 PUT_MODE (ret, QImode);
9506 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9507 if (bypass_test || second_test)
9509 rtx test = second_test;
9511 rtx tmp2 = gen_reg_rtx (QImode);
9518 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9520 PUT_MODE (test, QImode);
9521 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9524 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9526 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9529 return 1; /* DONE */
9532 /* Expand comparison setting or clearing carry flag. Return true when successful
9533 and set pop for the operation. */
9535 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9539 enum machine_mode mode =
9540 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9542 /* Do not handle DImode compares that go trought special path. Also we can't
9543 deal with FP compares yet. This is possible to add. */
9544 if ((mode == DImode && !TARGET_64BIT))
9546 if (FLOAT_MODE_P (mode))
9548 rtx second_test = NULL, bypass_test = NULL;
9549 rtx compare_op, compare_seq;
9551 /* Shortcut: following common codes never translate into carry flag compares. */
9552 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9553 || code == ORDERED || code == UNORDERED)
9556 /* These comparisons require zero flag; swap operands so they won't. */
9557 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9563 code = swap_condition (code);
9566 /* Try to expand the comparsion and verify that we end up with carry flag
9567 based comparsion. This is fails to be true only when we decide to expand
9568 comparsion using arithmetic that is not too common scenario. */
9570 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9571 &second_test, &bypass_test);
9572 compare_seq = get_insns ();
9575 if (second_test || bypass_test)
9577 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9578 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9579 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9581 code = GET_CODE (compare_op);
9582 if (code != LTU && code != GEU)
9584 emit_insn (compare_seq);
9588 if (!INTEGRAL_MODE_P (mode))
9596 /* Convert a==0 into (unsigned)a<1. */
9599 if (op1 != const0_rtx)
9602 code = (code == EQ ? LTU : GEU);
9605 /* Convert a>b into b<a or a>=b-1. */
9608 if (GET_CODE (op1) == CONST_INT)
9610 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9611 /* Bail out on overflow. We still can swap operands but that
9612 would force loading of the constant into register. */
9613 if (op1 == const0_rtx
9614 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9616 code = (code == GTU ? GEU : LTU);
9623 code = (code == GTU ? LTU : GEU);
9627 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9630 if (mode == DImode || op1 != const0_rtx)
9632 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9633 code = (code == LT ? GEU : LTU);
9637 if (mode == DImode || op1 != constm1_rtx)
9639 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9640 code = (code == LE ? GEU : LTU);
9646 ix86_compare_op0 = op0;
9647 ix86_compare_op1 = op1;
9648 *pop = ix86_expand_compare (code, NULL, NULL);
9649 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9655 ix86_expand_int_movcc (operands)
9658 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9659 rtx compare_seq, compare_op;
9660 rtx second_test, bypass_test;
9661 enum machine_mode mode = GET_MODE (operands[0]);
9662 bool sign_bit_compare_p = false;;
9665 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9666 compare_seq = get_insns ();
9669 compare_code = GET_CODE (compare_op);
9671 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9672 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9673 sign_bit_compare_p = true;
9675 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9676 HImode insns, we'd be swallowed in word prefix ops. */
9678 if ((mode != HImode || TARGET_FAST_PREFIX)
9679 && (mode != DImode || TARGET_64BIT)
9680 && GET_CODE (operands[2]) == CONST_INT
9681 && GET_CODE (operands[3]) == CONST_INT)
9683 rtx out = operands[0];
9684 HOST_WIDE_INT ct = INTVAL (operands[2]);
9685 HOST_WIDE_INT cf = INTVAL (operands[3]);
9689 /* Sign bit compares are better done using shifts than we do by using
9691 if (sign_bit_compare_p
9692 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9693 ix86_compare_op1, &compare_op))
9695 /* Detect overlap between destination and compare sources. */
9698 if (!sign_bit_compare_p)
9702 compare_code = GET_CODE (compare_op);
9704 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9705 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9708 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9711 /* To simplify rest of code, restrict to the GEU case. */
9712 if (compare_code == LTU)
9714 HOST_WIDE_INT tmp = ct;
9717 compare_code = reverse_condition (compare_code);
9718 code = reverse_condition (code);
9723 PUT_CODE (compare_op,
9724 reverse_condition_maybe_unordered
9725 (GET_CODE (compare_op)));
9727 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9731 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9732 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9733 tmp = gen_reg_rtx (mode);
9736 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9738 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9742 if (code == GT || code == GE)
9743 code = reverse_condition (code);
9746 HOST_WIDE_INT tmp = ct;
9751 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9752 ix86_compare_op1, VOIDmode, 0, -1);
9765 tmp = expand_simple_binop (mode, PLUS,
9767 copy_rtx (tmp), 1, OPTAB_DIRECT);
9778 tmp = expand_simple_binop (mode, IOR,
9780 copy_rtx (tmp), 1, OPTAB_DIRECT);
9782 else if (diff == -1 && ct)
9792 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9794 tmp = expand_simple_binop (mode, PLUS,
9795 copy_rtx (tmp), GEN_INT (cf),
9796 copy_rtx (tmp), 1, OPTAB_DIRECT);
9804 * andl cf - ct, dest
9814 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9817 tmp = expand_simple_binop (mode, AND,
9819 gen_int_mode (cf - ct, mode),
9820 copy_rtx (tmp), 1, OPTAB_DIRECT);
9822 tmp = expand_simple_binop (mode, PLUS,
9823 copy_rtx (tmp), GEN_INT (ct),
9824 copy_rtx (tmp), 1, OPTAB_DIRECT);
9827 if (!rtx_equal_p (tmp, out))
9828 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9830 return 1; /* DONE */
9836 tmp = ct, ct = cf, cf = tmp;
9838 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9840 /* We may be reversing unordered compare to normal compare, that
9841 is not valid in general (we may convert non-trapping condition
9842 to trapping one), however on i386 we currently emit all
9843 comparisons unordered. */
9844 compare_code = reverse_condition_maybe_unordered (compare_code);
9845 code = reverse_condition_maybe_unordered (code);
9849 compare_code = reverse_condition (compare_code);
9850 code = reverse_condition (code);
9855 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9856 && GET_CODE (ix86_compare_op1) == CONST_INT)
9858 if (ix86_compare_op1 == const0_rtx
9859 && (code == LT || code == GE))
9860 compare_code = code;
9861 else if (ix86_compare_op1 == constm1_rtx)
9865 else if (code == GT)
9870 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9871 if (compare_code != NIL
9872 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9873 && (cf == -1 || ct == -1))
9875 /* If lea code below could be used, only optimize
9876 if it results in a 2 insn sequence. */
9878 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9879 || diff == 3 || diff == 5 || diff == 9)
9880 || (compare_code == LT && ct == -1)
9881 || (compare_code == GE && cf == -1))
9884 * notl op1 (if necessary)
9892 code = reverse_condition (code);
9895 out = emit_store_flag (out, code, ix86_compare_op0,
9896 ix86_compare_op1, VOIDmode, 0, -1);
9898 out = expand_simple_binop (mode, IOR,
9900 out, 1, OPTAB_DIRECT);
9901 if (out != operands[0])
9902 emit_move_insn (operands[0], out);
9904 return 1; /* DONE */
9909 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9910 || diff == 3 || diff == 5 || diff == 9)
9911 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9912 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9918 * lea cf(dest*(ct-cf)),dest
9922 * This also catches the degenerate setcc-only case.
9928 out = emit_store_flag (out, code, ix86_compare_op0,
9929 ix86_compare_op1, VOIDmode, 0, 1);
9932 /* On x86_64 the lea instruction operates on Pmode, so we need
9933 to get arithmetics done in proper mode to match. */
9935 tmp = copy_rtx (out);
9939 out1 = copy_rtx (out);
9940 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9944 tmp = gen_rtx_PLUS (mode, tmp, out1);
9950 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9953 if (!rtx_equal_p (tmp, out))
9956 out = force_operand (tmp, copy_rtx (out));
9958 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9960 if (!rtx_equal_p (out, operands[0]))
9961 emit_move_insn (operands[0], copy_rtx (out));
9963 return 1; /* DONE */
9967 * General case: Jumpful:
9968 * xorl dest,dest cmpl op1, op2
9969 * cmpl op1, op2 movl ct, dest
9971 * decl dest movl cf, dest
9972 * andl (cf-ct),dest 1:
9977 * This is reasonably steep, but branch mispredict costs are
9978 * high on modern cpus, so consider failing only if optimizing
9982 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9983 && BRANCH_COST >= 2)
9989 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9990 /* We may be reversing unordered compare to normal compare,
9991 that is not valid in general (we may convert non-trapping
9992 condition to trapping one), however on i386 we currently
9993 emit all comparisons unordered. */
9994 code = reverse_condition_maybe_unordered (code);
9997 code = reverse_condition (code);
9998 if (compare_code != NIL)
9999 compare_code = reverse_condition (compare_code);
10003 if (compare_code != NIL)
10005 /* notl op1 (if needed)
10010 For x < 0 (resp. x <= -1) there will be no notl,
10011 so if possible swap the constants to get rid of the
10013 True/false will be -1/0 while code below (store flag
10014 followed by decrement) is 0/-1, so the constants need
10015 to be exchanged once more. */
10017 if (compare_code == GE || !cf)
10019 code = reverse_condition (code);
10024 HOST_WIDE_INT tmp = cf;
10029 out = emit_store_flag (out, code, ix86_compare_op0,
10030 ix86_compare_op1, VOIDmode, 0, -1);
10034 out = emit_store_flag (out, code, ix86_compare_op0,
10035 ix86_compare_op1, VOIDmode, 0, 1);
10037 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10038 copy_rtx (out), 1, OPTAB_DIRECT);
10041 out = expand_simple_binop (mode, AND, copy_rtx (out),
10042 gen_int_mode (cf - ct, mode),
10043 copy_rtx (out), 1, OPTAB_DIRECT);
10045 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10046 copy_rtx (out), 1, OPTAB_DIRECT);
10047 if (!rtx_equal_p (out, operands[0]))
10048 emit_move_insn (operands[0], copy_rtx (out));
10050 return 1; /* DONE */
10054 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10056 /* Try a few things more with specific constants and a variable. */
10059 rtx var, orig_out, out, tmp;
10061 if (BRANCH_COST <= 2)
10062 return 0; /* FAIL */
10064 /* If one of the two operands is an interesting constant, load a
10065 constant with the above and mask it in with a logical operation. */
10067 if (GET_CODE (operands[2]) == CONST_INT)
10070 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10071 operands[3] = constm1_rtx, op = and_optab;
10072 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10073 operands[3] = const0_rtx, op = ior_optab;
10075 return 0; /* FAIL */
10077 else if (GET_CODE (operands[3]) == CONST_INT)
10080 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10081 operands[2] = constm1_rtx, op = and_optab;
10082 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10083 operands[2] = const0_rtx, op = ior_optab;
10085 return 0; /* FAIL */
10088 return 0; /* FAIL */
10090 orig_out = operands[0];
10091 tmp = gen_reg_rtx (mode);
10094 /* Recurse to get the constant loaded. */
10095 if (ix86_expand_int_movcc (operands) == 0)
10096 return 0; /* FAIL */
10098 /* Mask in the interesting variable. */
10099 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10101 if (!rtx_equal_p (out, orig_out))
10102 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10104 return 1; /* DONE */
10108 * For comparison with above,
10118 if (! nonimmediate_operand (operands[2], mode))
10119 operands[2] = force_reg (mode, operands[2]);
10120 if (! nonimmediate_operand (operands[3], mode))
10121 operands[3] = force_reg (mode, operands[3]);
10123 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10125 rtx tmp = gen_reg_rtx (mode);
10126 emit_move_insn (tmp, operands[3]);
10129 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10131 rtx tmp = gen_reg_rtx (mode);
10132 emit_move_insn (tmp, operands[2]);
10136 if (! register_operand (operands[2], VOIDmode)
10138 || ! register_operand (operands[3], VOIDmode)))
10139 operands[2] = force_reg (mode, operands[2]);
10142 && ! register_operand (operands[3], VOIDmode))
10143 operands[3] = force_reg (mode, operands[3]);
10145 emit_insn (compare_seq);
10146 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10147 gen_rtx_IF_THEN_ELSE (mode,
10148 compare_op, operands[2],
10151 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10152 gen_rtx_IF_THEN_ELSE (mode,
10154 copy_rtx (operands[3]),
10155 copy_rtx (operands[0]))));
10157 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10158 gen_rtx_IF_THEN_ELSE (mode,
10160 copy_rtx (operands[2]),
10161 copy_rtx (operands[0]))));
10163 return 1; /* DONE */
10167 ix86_expand_fp_movcc (operands)
10170 enum rtx_code code;
10172 rtx compare_op, second_test, bypass_test;
10174 /* For SF/DFmode conditional moves based on comparisons
10175 in same mode, we may want to use SSE min/max instructions. */
10176 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10177 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10178 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10179 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10180 && (!TARGET_IEEE_FP
10181 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10182 /* We may be called from the post-reload splitter. */
10183 && (!REG_P (operands[0])
10184 || SSE_REG_P (operands[0])
10185 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10187 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10188 code = GET_CODE (operands[1]);
10190 /* See if we have (cross) match between comparison operands and
10191 conditional move operands. */
10192 if (rtx_equal_p (operands[2], op1))
10197 code = reverse_condition_maybe_unordered (code);
10199 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10201 /* Check for min operation. */
10202 if (code == LT || code == UNLE)
10210 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10211 if (memory_operand (op0, VOIDmode))
10212 op0 = force_reg (GET_MODE (operands[0]), op0);
10213 if (GET_MODE (operands[0]) == SFmode)
10214 emit_insn (gen_minsf3 (operands[0], op0, op1));
10216 emit_insn (gen_mindf3 (operands[0], op0, op1));
10219 /* Check for max operation. */
10220 if (code == GT || code == UNGE)
10228 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10229 if (memory_operand (op0, VOIDmode))
10230 op0 = force_reg (GET_MODE (operands[0]), op0);
10231 if (GET_MODE (operands[0]) == SFmode)
10232 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10234 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10238 /* Manage condition to be sse_comparison_operator. In case we are
10239 in non-ieee mode, try to canonicalize the destination operand
10240 to be first in the comparison - this helps reload to avoid extra
10242 if (!sse_comparison_operator (operands[1], VOIDmode)
10243 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10245 rtx tmp = ix86_compare_op0;
10246 ix86_compare_op0 = ix86_compare_op1;
10247 ix86_compare_op1 = tmp;
10248 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10249 VOIDmode, ix86_compare_op0,
10252 /* Similarly try to manage result to be first operand of conditional
10253 move. We also don't support the NE comparison on SSE, so try to
10255 if ((rtx_equal_p (operands[0], operands[3])
10256 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10257 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10259 rtx tmp = operands[2];
10260 operands[2] = operands[3];
10262 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10263 (GET_CODE (operands[1])),
10264 VOIDmode, ix86_compare_op0,
10267 if (GET_MODE (operands[0]) == SFmode)
10268 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10269 operands[2], operands[3],
10270 ix86_compare_op0, ix86_compare_op1));
10272 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10273 operands[2], operands[3],
10274 ix86_compare_op0, ix86_compare_op1));
10278 /* The floating point conditional move instructions don't directly
10279 support conditions resulting from a signed integer comparison. */
10281 code = GET_CODE (operands[1]);
10282 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10284 /* The floating point conditional move instructions don't directly
10285 support signed integer comparisons. */
10287 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10289 if (second_test != NULL || bypass_test != NULL)
10291 tmp = gen_reg_rtx (QImode);
10292 ix86_expand_setcc (code, tmp);
10294 ix86_compare_op0 = tmp;
10295 ix86_compare_op1 = const0_rtx;
10296 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10298 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10300 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10301 emit_move_insn (tmp, operands[3]);
10304 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10306 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10307 emit_move_insn (tmp, operands[2]);
10311 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10312 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10317 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10318 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10323 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10324 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10332 /* Expand conditional increment or decrement using adb/sbb instructions.
10333 The default case using setcc followed by the conditional move can be
10334 done by generic code. */
10336 ix86_expand_int_addcc (operands)
10339 enum rtx_code code = GET_CODE (operands[1]);
10341 rtx val = const0_rtx;
10342 bool fpcmp = false;
10343 enum machine_mode mode = GET_MODE (operands[0]);
10345 if (operands[3] != const1_rtx
10346 && operands[3] != constm1_rtx)
10348 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10349 ix86_compare_op1, &compare_op))
10351 code = GET_CODE (compare_op);
10353 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10354 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10357 code = ix86_fp_compare_code_to_integer (code);
10364 PUT_CODE (compare_op,
10365 reverse_condition_maybe_unordered
10366 (GET_CODE (compare_op)));
10368 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10370 PUT_MODE (compare_op, mode);
10372 /* Construct either adc or sbb insn. */
10373 if ((code == LTU) == (operands[3] == constm1_rtx))
10375 switch (GET_MODE (operands[0]))
10378 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10381 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10384 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10387 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10395 switch (GET_MODE (operands[0]))
10398 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10401 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10404 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10407 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10413 return 1; /* DONE */
10417 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10418 works for floating pointer parameters and nonoffsetable memories.
10419 For pushes, it returns just stack offsets; the values will be saved
10420 in the right order. Maximally three parts are generated. */
10423 ix86_split_to_parts (operand, parts, mode)
10426 enum machine_mode mode;
10431 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10433 size = (GET_MODE_SIZE (mode) + 4) / 8;
10435 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10437 if (size < 2 || size > 3)
10440 /* Optimize constant pool reference to immediates. This is used by fp
10441 moves, that force all constants to memory to allow combining. */
10442 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10444 rtx tmp = maybe_get_pool_constant (operand);
10449 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10451 /* The only non-offsetable memories we handle are pushes. */
10452 if (! push_operand (operand, VOIDmode))
10455 operand = copy_rtx (operand);
10456 PUT_MODE (operand, Pmode);
10457 parts[0] = parts[1] = parts[2] = operand;
10459 else if (!TARGET_64BIT)
10461 if (mode == DImode)
10462 split_di (&operand, 1, &parts[0], &parts[1]);
10465 if (REG_P (operand))
10467 if (!reload_completed)
10469 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10470 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10472 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10474 else if (offsettable_memref_p (operand))
10476 operand = adjust_address (operand, SImode, 0);
10477 parts[0] = operand;
10478 parts[1] = adjust_address (operand, SImode, 4);
10480 parts[2] = adjust_address (operand, SImode, 8);
10482 else if (GET_CODE (operand) == CONST_DOUBLE)
10487 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10492 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10493 parts[2] = gen_int_mode (l[2], SImode);
10496 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10501 parts[1] = gen_int_mode (l[1], SImode);
10502 parts[0] = gen_int_mode (l[0], SImode);
10510 if (mode == TImode)
10511 split_ti (&operand, 1, &parts[0], &parts[1]);
10512 if (mode == XFmode || mode == TFmode)
10514 if (REG_P (operand))
10516 if (!reload_completed)
10518 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10519 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10521 else if (offsettable_memref_p (operand))
10523 operand = adjust_address (operand, DImode, 0);
10524 parts[0] = operand;
10525 parts[1] = adjust_address (operand, SImode, 8);
10527 else if (GET_CODE (operand) == CONST_DOUBLE)
10532 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10533 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10534 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10535 if (HOST_BITS_PER_WIDE_INT >= 64)
10538 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10539 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10542 parts[0] = immed_double_const (l[0], l[1], DImode);
10543 parts[1] = gen_int_mode (l[2], SImode);
10553 /* Emit insns to perform a move or push of DI, DF, and XF values.
10554 Return false when normal moves are needed; true when all required
10555 insns have been emitted. Operands 2-4 contain the input values
10556 int the correct order; operands 5-7 contain the output values. */
10559 ix86_split_long_move (operands)
10565 int collisions = 0;
10566 enum machine_mode mode = GET_MODE (operands[0]);
10568 /* The DFmode expanders may ask us to move double.
10569 For 64bit target this is single move. By hiding the fact
10570 here we simplify i386.md splitters. */
10571 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10573 /* Optimize constant pool reference to immediates. This is used by
10574 fp moves, that force all constants to memory to allow combining. */
10576 if (GET_CODE (operands[1]) == MEM
10577 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10578 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10579 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10580 if (push_operand (operands[0], VOIDmode))
10582 operands[0] = copy_rtx (operands[0]);
10583 PUT_MODE (operands[0], Pmode);
10586 operands[0] = gen_lowpart (DImode, operands[0]);
10587 operands[1] = gen_lowpart (DImode, operands[1]);
10588 emit_move_insn (operands[0], operands[1]);
10592 /* The only non-offsettable memory we handle is push. */
10593 if (push_operand (operands[0], VOIDmode))
10595 else if (GET_CODE (operands[0]) == MEM
10596 && ! offsettable_memref_p (operands[0]))
10599 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10600 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10602 /* When emitting push, take care for source operands on the stack. */
10603 if (push && GET_CODE (operands[1]) == MEM
10604 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10607 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10608 XEXP (part[1][2], 0));
10609 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10610 XEXP (part[1][1], 0));
10613 /* We need to do copy in the right order in case an address register
10614 of the source overlaps the destination. */
10615 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10617 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10619 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10622 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10625 /* Collision in the middle part can be handled by reordering. */
10626 if (collisions == 1 && nparts == 3
10627 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10630 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10631 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10634 /* If there are more collisions, we can't handle it by reordering.
10635 Do an lea to the last part and use only one colliding move. */
10636 else if (collisions > 1)
10639 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10640 XEXP (part[1][0], 0)));
10641 part[1][0] = change_address (part[1][0],
10642 TARGET_64BIT ? DImode : SImode,
10643 part[0][nparts - 1]);
10644 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10646 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10656 /* We use only first 12 bytes of TFmode value, but for pushing we
10657 are required to adjust stack as if we were pushing real 16byte
10659 if (mode == TFmode && !TARGET_64BIT)
10660 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10662 emit_move_insn (part[0][2], part[1][2]);
10667 /* In 64bit mode we don't have 32bit push available. In case this is
10668 register, it is OK - we will just use larger counterpart. We also
10669 retype memory - these comes from attempt to avoid REX prefix on
10670 moving of second half of TFmode value. */
10671 if (GET_MODE (part[1][1]) == SImode)
10673 if (GET_CODE (part[1][1]) == MEM)
10674 part[1][1] = adjust_address (part[1][1], DImode, 0);
10675 else if (REG_P (part[1][1]))
10676 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10679 if (GET_MODE (part[1][0]) == SImode)
10680 part[1][0] = part[1][1];
10683 emit_move_insn (part[0][1], part[1][1]);
10684 emit_move_insn (part[0][0], part[1][0]);
10688 /* Choose correct order to not overwrite the source before it is copied. */
10689 if ((REG_P (part[0][0])
10690 && REG_P (part[1][1])
10691 && (REGNO (part[0][0]) == REGNO (part[1][1])
10693 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10695 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10699 operands[2] = part[0][2];
10700 operands[3] = part[0][1];
10701 operands[4] = part[0][0];
10702 operands[5] = part[1][2];
10703 operands[6] = part[1][1];
10704 operands[7] = part[1][0];
10708 operands[2] = part[0][1];
10709 operands[3] = part[0][0];
10710 operands[5] = part[1][1];
10711 operands[6] = part[1][0];
10718 operands[2] = part[0][0];
10719 operands[3] = part[0][1];
10720 operands[4] = part[0][2];
10721 operands[5] = part[1][0];
10722 operands[6] = part[1][1];
10723 operands[7] = part[1][2];
10727 operands[2] = part[0][0];
10728 operands[3] = part[0][1];
10729 operands[5] = part[1][0];
10730 operands[6] = part[1][1];
10733 emit_move_insn (operands[2], operands[5]);
10734 emit_move_insn (operands[3], operands[6]);
10736 emit_move_insn (operands[4], operands[7]);
10742 ix86_split_ashldi (operands, scratch)
10743 rtx *operands, scratch;
10745 rtx low[2], high[2];
10748 if (GET_CODE (operands[2]) == CONST_INT)
10750 split_di (operands, 2, low, high);
10751 count = INTVAL (operands[2]) & 63;
10755 emit_move_insn (high[0], low[1]);
10756 emit_move_insn (low[0], const0_rtx);
10759 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10763 if (!rtx_equal_p (operands[0], operands[1]))
10764 emit_move_insn (operands[0], operands[1]);
10765 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10766 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10771 if (!rtx_equal_p (operands[0], operands[1]))
10772 emit_move_insn (operands[0], operands[1]);
10774 split_di (operands, 1, low, high);
10776 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10777 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10779 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10781 if (! no_new_pseudos)
10782 scratch = force_reg (SImode, const0_rtx);
10784 emit_move_insn (scratch, const0_rtx);
10786 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10790 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10795 ix86_split_ashrdi (operands, scratch)
10796 rtx *operands, scratch;
10798 rtx low[2], high[2];
10801 if (GET_CODE (operands[2]) == CONST_INT)
10803 split_di (operands, 2, low, high);
10804 count = INTVAL (operands[2]) & 63;
10808 emit_move_insn (low[0], high[1]);
10810 if (! reload_completed)
10811 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10814 emit_move_insn (high[0], low[0]);
10815 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10819 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10823 if (!rtx_equal_p (operands[0], operands[1]))
10824 emit_move_insn (operands[0], operands[1]);
10825 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10826 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10831 if (!rtx_equal_p (operands[0], operands[1]))
10832 emit_move_insn (operands[0], operands[1]);
10834 split_di (operands, 1, low, high);
10836 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10837 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10839 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10841 if (! no_new_pseudos)
10842 scratch = gen_reg_rtx (SImode);
10843 emit_move_insn (scratch, high[0]);
10844 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10845 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10849 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10854 ix86_split_lshrdi (operands, scratch)
10855 rtx *operands, scratch;
10857 rtx low[2], high[2];
10860 if (GET_CODE (operands[2]) == CONST_INT)
10862 split_di (operands, 2, low, high);
10863 count = INTVAL (operands[2]) & 63;
10867 emit_move_insn (low[0], high[1]);
10868 emit_move_insn (high[0], const0_rtx);
10871 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10875 if (!rtx_equal_p (operands[0], operands[1]))
10876 emit_move_insn (operands[0], operands[1]);
10877 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10878 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10883 if (!rtx_equal_p (operands[0], operands[1]))
10884 emit_move_insn (operands[0], operands[1]);
10886 split_di (operands, 1, low, high);
10888 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10889 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10891 /* Heh. By reversing the arguments, we can reuse this pattern. */
10892 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10894 if (! no_new_pseudos)
10895 scratch = force_reg (SImode, const0_rtx);
10897 emit_move_insn (scratch, const0_rtx);
10899 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10903 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10907 /* Helper function for the string operations below. Dest VARIABLE whether
10908 it is aligned to VALUE bytes. If true, jump to the label. */
10910 ix86_expand_aligntest (variable, value)
10914 rtx label = gen_label_rtx ();
10915 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10916 if (GET_MODE (variable) == DImode)
10917 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10919 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10920 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10925 /* Adjust COUNTER by the VALUE. */
10927 ix86_adjust_counter (countreg, value)
10929 HOST_WIDE_INT value;
10931 if (GET_MODE (countreg) == DImode)
10932 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10934 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10937 /* Zero extend possibly SImode EXP to Pmode register. */
10939 ix86_zero_extend_to_Pmode (exp)
10943 if (GET_MODE (exp) == VOIDmode)
10944 return force_reg (Pmode, exp);
10945 if (GET_MODE (exp) == Pmode)
10946 return copy_to_mode_reg (Pmode, exp);
10947 r = gen_reg_rtx (Pmode);
10948 emit_insn (gen_zero_extendsidi2 (r, exp));
10952 /* Expand string move (memcpy) operation. Use i386 string operations when
10953 profitable. expand_clrstr contains similar code. */
10955 ix86_expand_movstr (dst, src, count_exp, align_exp)
10956 rtx dst, src, count_exp, align_exp;
10958 rtx srcreg, destreg, countreg;
10959 enum machine_mode counter_mode;
10960 HOST_WIDE_INT align = 0;
10961 unsigned HOST_WIDE_INT count = 0;
10964 if (GET_CODE (align_exp) == CONST_INT)
10965 align = INTVAL (align_exp);
10967 /* Can't use any of this if the user has appropriated esi or edi. */
10968 if (global_regs[4] || global_regs[5])
10971 /* This simple hack avoids all inlining code and simplifies code below. */
10972 if (!TARGET_ALIGN_STRINGOPS)
10975 if (GET_CODE (count_exp) == CONST_INT)
10977 count = INTVAL (count_exp);
10978 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10982 /* Figure out proper mode for counter. For 32bits it is always SImode,
10983 for 64bits use SImode when possible, otherwise DImode.
10984 Set count to number of bytes copied when known at compile time. */
10985 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10986 || x86_64_zero_extended_value (count_exp))
10987 counter_mode = SImode;
10989 counter_mode = DImode;
10993 if (counter_mode != SImode && counter_mode != DImode)
10996 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10997 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10999 emit_insn (gen_cld ());
11001 /* When optimizing for size emit simple rep ; movsb instruction for
11002 counts not divisible by 4. */
11004 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11006 countreg = ix86_zero_extend_to_Pmode (count_exp);
11008 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11009 destreg, srcreg, countreg));
11011 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11012 destreg, srcreg, countreg));
11015 /* For constant aligned (or small unaligned) copies use rep movsl
11016 followed by code copying the rest. For PentiumPro ensure 8 byte
11017 alignment to allow rep movsl acceleration. */
11019 else if (count != 0
11021 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11022 || optimize_size || count < (unsigned int) 64))
11024 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11025 if (count & ~(size - 1))
11027 countreg = copy_to_mode_reg (counter_mode,
11028 GEN_INT ((count >> (size == 4 ? 2 : 3))
11029 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11030 countreg = ix86_zero_extend_to_Pmode (countreg);
11034 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11035 destreg, srcreg, countreg));
11037 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11038 destreg, srcreg, countreg));
11041 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11042 destreg, srcreg, countreg));
11044 if (size == 8 && (count & 0x04))
11045 emit_insn (gen_strmovsi (destreg, srcreg));
11047 emit_insn (gen_strmovhi (destreg, srcreg));
11049 emit_insn (gen_strmovqi (destreg, srcreg));
11051 /* The generic code based on the glibc implementation:
11052 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11053 allowing accelerated copying there)
11054 - copy the data using rep movsl
11055 - copy the rest. */
11060 int desired_alignment = (TARGET_PENTIUMPRO
11061 && (count == 0 || count >= (unsigned int) 260)
11062 ? 8 : UNITS_PER_WORD);
11064 /* In case we don't know anything about the alignment, default to
11065 library version, since it is usually equally fast and result in
11068 Also emit call when we know that the count is large and call overhead
11069 will not be important. */
11070 if (!TARGET_INLINE_ALL_STRINGOPS
11071 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11077 if (TARGET_SINGLE_STRINGOP)
11078 emit_insn (gen_cld ());
11080 countreg2 = gen_reg_rtx (Pmode);
11081 countreg = copy_to_mode_reg (counter_mode, count_exp);
11083 /* We don't use loops to align destination and to copy parts smaller
11084 than 4 bytes, because gcc is able to optimize such code better (in
11085 the case the destination or the count really is aligned, gcc is often
11086 able to predict the branches) and also it is friendlier to the
11087 hardware branch prediction.
11089 Using loops is beneficial for generic case, because we can
11090 handle small counts using the loops. Many CPUs (such as Athlon)
11091 have large REP prefix setup costs.
11093 This is quite costly. Maybe we can revisit this decision later or
11094 add some customizability to this code. */
11096 if (count == 0 && align < desired_alignment)
11098 label = gen_label_rtx ();
11099 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11100 LEU, 0, counter_mode, 1, label);
11104 rtx label = ix86_expand_aligntest (destreg, 1);
11105 emit_insn (gen_strmovqi (destreg, srcreg));
11106 ix86_adjust_counter (countreg, 1);
11107 emit_label (label);
11108 LABEL_NUSES (label) = 1;
11112 rtx label = ix86_expand_aligntest (destreg, 2);
11113 emit_insn (gen_strmovhi (destreg, srcreg));
11114 ix86_adjust_counter (countreg, 2);
11115 emit_label (label);
11116 LABEL_NUSES (label) = 1;
11118 if (align <= 4 && desired_alignment > 4)
11120 rtx label = ix86_expand_aligntest (destreg, 4);
11121 emit_insn (gen_strmovsi (destreg, srcreg));
11122 ix86_adjust_counter (countreg, 4);
11123 emit_label (label);
11124 LABEL_NUSES (label) = 1;
11127 if (label && desired_alignment > 4 && !TARGET_64BIT)
11129 emit_label (label);
11130 LABEL_NUSES (label) = 1;
11133 if (!TARGET_SINGLE_STRINGOP)
11134 emit_insn (gen_cld ());
11137 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11139 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11140 destreg, srcreg, countreg2));
11144 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11145 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11146 destreg, srcreg, countreg2));
11151 emit_label (label);
11152 LABEL_NUSES (label) = 1;
11154 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11155 emit_insn (gen_strmovsi (destreg, srcreg));
11156 if ((align <= 4 || count == 0) && TARGET_64BIT)
11158 rtx label = ix86_expand_aligntest (countreg, 4);
11159 emit_insn (gen_strmovsi (destreg, srcreg));
11160 emit_label (label);
11161 LABEL_NUSES (label) = 1;
11163 if (align > 2 && count != 0 && (count & 2))
11164 emit_insn (gen_strmovhi (destreg, srcreg));
11165 if (align <= 2 || count == 0)
11167 rtx label = ix86_expand_aligntest (countreg, 2);
11168 emit_insn (gen_strmovhi (destreg, srcreg));
11169 emit_label (label);
11170 LABEL_NUSES (label) = 1;
11172 if (align > 1 && count != 0 && (count & 1))
11173 emit_insn (gen_strmovqi (destreg, srcreg));
11174 if (align <= 1 || count == 0)
11176 rtx label = ix86_expand_aligntest (countreg, 1);
11177 emit_insn (gen_strmovqi (destreg, srcreg));
11178 emit_label (label);
11179 LABEL_NUSES (label) = 1;
11183 insns = get_insns ();
11186 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11191 /* Expand string clear operation (bzero). Use i386 string operations when
11192 profitable. expand_movstr contains similar code. */
11194 ix86_expand_clrstr (src, count_exp, align_exp)
11195 rtx src, count_exp, align_exp;
11197 rtx destreg, zeroreg, countreg;
11198 enum machine_mode counter_mode;
11199 HOST_WIDE_INT align = 0;
11200 unsigned HOST_WIDE_INT count = 0;
11202 if (GET_CODE (align_exp) == CONST_INT)
11203 align = INTVAL (align_exp);
11205 /* Can't use any of this if the user has appropriated esi. */
11206 if (global_regs[4])
11209 /* This simple hack avoids all inlining code and simplifies code below. */
11210 if (!TARGET_ALIGN_STRINGOPS)
11213 if (GET_CODE (count_exp) == CONST_INT)
11215 count = INTVAL (count_exp);
11216 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11219 /* Figure out proper mode for counter. For 32bits it is always SImode,
11220 for 64bits use SImode when possible, otherwise DImode.
11221 Set count to number of bytes copied when known at compile time. */
11222 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11223 || x86_64_zero_extended_value (count_exp))
11224 counter_mode = SImode;
11226 counter_mode = DImode;
11228 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11230 emit_insn (gen_cld ());
11232 /* When optimizing for size emit simple rep ; movsb instruction for
11233 counts not divisible by 4. */
11235 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11237 countreg = ix86_zero_extend_to_Pmode (count_exp);
11238 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11240 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11241 destreg, countreg));
11243 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11244 destreg, countreg));
11246 else if (count != 0
11248 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11249 || optimize_size || count < (unsigned int) 64))
11251 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11252 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11253 if (count & ~(size - 1))
11255 countreg = copy_to_mode_reg (counter_mode,
11256 GEN_INT ((count >> (size == 4 ? 2 : 3))
11257 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11258 countreg = ix86_zero_extend_to_Pmode (countreg);
11262 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11263 destreg, countreg));
11265 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11266 destreg, countreg));
11269 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11270 destreg, countreg));
11272 if (size == 8 && (count & 0x04))
11273 emit_insn (gen_strsetsi (destreg,
11274 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11276 emit_insn (gen_strsethi (destreg,
11277 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11279 emit_insn (gen_strsetqi (destreg,
11280 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11286 /* Compute desired alignment of the string operation. */
11287 int desired_alignment = (TARGET_PENTIUMPRO
11288 && (count == 0 || count >= (unsigned int) 260)
11289 ? 8 : UNITS_PER_WORD);
11291 /* In case we don't know anything about the alignment, default to
11292 library version, since it is usually equally fast and result in
11295 Also emit call when we know that the count is large and call overhead
11296 will not be important. */
11297 if (!TARGET_INLINE_ALL_STRINGOPS
11298 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11301 if (TARGET_SINGLE_STRINGOP)
11302 emit_insn (gen_cld ());
11304 countreg2 = gen_reg_rtx (Pmode);
11305 countreg = copy_to_mode_reg (counter_mode, count_exp);
11306 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11308 if (count == 0 && align < desired_alignment)
11310 label = gen_label_rtx ();
11311 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11312 LEU, 0, counter_mode, 1, label);
11316 rtx label = ix86_expand_aligntest (destreg, 1);
11317 emit_insn (gen_strsetqi (destreg,
11318 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11319 ix86_adjust_counter (countreg, 1);
11320 emit_label (label);
11321 LABEL_NUSES (label) = 1;
11325 rtx label = ix86_expand_aligntest (destreg, 2);
11326 emit_insn (gen_strsethi (destreg,
11327 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11328 ix86_adjust_counter (countreg, 2);
11329 emit_label (label);
11330 LABEL_NUSES (label) = 1;
11332 if (align <= 4 && desired_alignment > 4)
11334 rtx label = ix86_expand_aligntest (destreg, 4);
11335 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11336 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11338 ix86_adjust_counter (countreg, 4);
11339 emit_label (label);
11340 LABEL_NUSES (label) = 1;
11343 if (label && desired_alignment > 4 && !TARGET_64BIT)
11345 emit_label (label);
11346 LABEL_NUSES (label) = 1;
11350 if (!TARGET_SINGLE_STRINGOP)
11351 emit_insn (gen_cld ());
11354 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11356 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11357 destreg, countreg2));
11361 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11362 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11363 destreg, countreg2));
11367 emit_label (label);
11368 LABEL_NUSES (label) = 1;
11371 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11372 emit_insn (gen_strsetsi (destreg,
11373 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11374 if (TARGET_64BIT && (align <= 4 || count == 0))
11376 rtx label = ix86_expand_aligntest (countreg, 4);
11377 emit_insn (gen_strsetsi (destreg,
11378 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11379 emit_label (label);
11380 LABEL_NUSES (label) = 1;
11382 if (align > 2 && count != 0 && (count & 2))
11383 emit_insn (gen_strsethi (destreg,
11384 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11385 if (align <= 2 || count == 0)
11387 rtx label = ix86_expand_aligntest (countreg, 2);
11388 emit_insn (gen_strsethi (destreg,
11389 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11390 emit_label (label);
11391 LABEL_NUSES (label) = 1;
11393 if (align > 1 && count != 0 && (count & 1))
11394 emit_insn (gen_strsetqi (destreg,
11395 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11396 if (align <= 1 || count == 0)
11398 rtx label = ix86_expand_aligntest (countreg, 1);
11399 emit_insn (gen_strsetqi (destreg,
11400 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11401 emit_label (label);
11402 LABEL_NUSES (label) = 1;
11407 /* Expand strlen. */
11409 ix86_expand_strlen (out, src, eoschar, align)
11410 rtx out, src, eoschar, align;
11412 rtx addr, scratch1, scratch2, scratch3, scratch4;
11414 /* The generic case of strlen expander is long. Avoid it's
11415 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11417 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11418 && !TARGET_INLINE_ALL_STRINGOPS
11420 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11423 addr = force_reg (Pmode, XEXP (src, 0));
11424 scratch1 = gen_reg_rtx (Pmode);
11426 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11429 /* Well it seems that some optimizer does not combine a call like
11430 foo(strlen(bar), strlen(bar));
11431 when the move and the subtraction is done here. It does calculate
11432 the length just once when these instructions are done inside of
11433 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11434 often used and I use one fewer register for the lifetime of
11435 output_strlen_unroll() this is better. */
11437 emit_move_insn (out, addr);
11439 ix86_expand_strlensi_unroll_1 (out, align);
11441 /* strlensi_unroll_1 returns the address of the zero at the end of
11442 the string, like memchr(), so compute the length by subtracting
11443 the start address. */
11445 emit_insn (gen_subdi3 (out, out, addr));
11447 emit_insn (gen_subsi3 (out, out, addr));
11451 scratch2 = gen_reg_rtx (Pmode);
11452 scratch3 = gen_reg_rtx (Pmode);
11453 scratch4 = force_reg (Pmode, constm1_rtx);
11455 emit_move_insn (scratch3, addr);
11456 eoschar = force_reg (QImode, eoschar);
11458 emit_insn (gen_cld ());
11461 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11462 align, scratch4, scratch3));
11463 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11464 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11468 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11469 align, scratch4, scratch3));
11470 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11471 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11477 /* Expand the appropriate insns for doing strlen if not just doing
11480 out = result, initialized with the start address
11481 align_rtx = alignment of the address.
11482 scratch = scratch register, initialized with the startaddress when
11483 not aligned, otherwise undefined
11485 This is just the body. It needs the initialisations mentioned above and
11486 some address computing at the end. These things are done in i386.md. */
11489 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11490 rtx out, align_rtx;
11494 rtx align_2_label = NULL_RTX;
11495 rtx align_3_label = NULL_RTX;
11496 rtx align_4_label = gen_label_rtx ();
11497 rtx end_0_label = gen_label_rtx ();
11499 rtx tmpreg = gen_reg_rtx (SImode);
11500 rtx scratch = gen_reg_rtx (SImode);
11504 if (GET_CODE (align_rtx) == CONST_INT)
11505 align = INTVAL (align_rtx);
11507 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11509 /* Is there a known alignment and is it less than 4? */
11512 rtx scratch1 = gen_reg_rtx (Pmode);
11513 emit_move_insn (scratch1, out);
11514 /* Is there a known alignment and is it not 2? */
11517 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11518 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11520 /* Leave just the 3 lower bits. */
11521 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11522 NULL_RTX, 0, OPTAB_WIDEN);
11524 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11525 Pmode, 1, align_4_label);
11526 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11527 Pmode, 1, align_2_label);
11528 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11529 Pmode, 1, align_3_label);
11533 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11534 check if is aligned to 4 - byte. */
11536 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11537 NULL_RTX, 0, OPTAB_WIDEN);
11539 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11540 Pmode, 1, align_4_label);
11543 mem = gen_rtx_MEM (QImode, out);
11545 /* Now compare the bytes. */
11547 /* Compare the first n unaligned byte on a byte per byte basis. */
11548 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11549 QImode, 1, end_0_label);
11551 /* Increment the address. */
11553 emit_insn (gen_adddi3 (out, out, const1_rtx));
11555 emit_insn (gen_addsi3 (out, out, const1_rtx));
11557 /* Not needed with an alignment of 2 */
11560 emit_label (align_2_label);
11562 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11566 emit_insn (gen_adddi3 (out, out, const1_rtx));
11568 emit_insn (gen_addsi3 (out, out, const1_rtx));
11570 emit_label (align_3_label);
11573 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11577 emit_insn (gen_adddi3 (out, out, const1_rtx));
11579 emit_insn (gen_addsi3 (out, out, const1_rtx));
11582 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11583 align this loop. It gives only huge programs, but does not help to
11585 emit_label (align_4_label);
11587 mem = gen_rtx_MEM (SImode, out);
11588 emit_move_insn (scratch, mem);
11590 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11592 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11594 /* This formula yields a nonzero result iff one of the bytes is zero.
11595 This saves three branches inside loop and many cycles. */
11597 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11598 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11599 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11600 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11601 gen_int_mode (0x80808080, SImode)));
11602 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11607 rtx reg = gen_reg_rtx (SImode);
11608 rtx reg2 = gen_reg_rtx (Pmode);
11609 emit_move_insn (reg, tmpreg);
11610 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11612 /* If zero is not in the first two bytes, move two bytes forward. */
11613 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11614 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11615 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11616 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11617 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11620 /* Emit lea manually to avoid clobbering of flags. */
11621 emit_insn (gen_rtx_SET (SImode, reg2,
11622 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11624 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11625 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11626 emit_insn (gen_rtx_SET (VOIDmode, out,
11627 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11634 rtx end_2_label = gen_label_rtx ();
11635 /* Is zero in the first two bytes? */
11637 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11638 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11639 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11640 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11641 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11643 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11644 JUMP_LABEL (tmp) = end_2_label;
11646 /* Not in the first two. Move two bytes forward. */
11647 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11649 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11651 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11653 emit_label (end_2_label);
11657 /* Avoid branch in fixing the byte. */
11658 tmpreg = gen_lowpart (QImode, tmpreg);
11659 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11660 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11662 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11664 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11666 emit_label (end_0_label);
11670 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11671 rtx retval, fnaddr, callarg1, callarg2, pop;
11674 rtx use = NULL, call;
11676 if (pop == const0_rtx)
11678 if (TARGET_64BIT && pop)
11682 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11683 fnaddr = machopic_indirect_call_target (fnaddr);
11685 /* Static functions and indirect calls don't need the pic register. */
11686 if (! TARGET_64BIT && flag_pic
11687 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11688 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11689 use_reg (&use, pic_offset_table_rtx);
11691 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11693 rtx al = gen_rtx_REG (QImode, 0);
11694 emit_move_insn (al, callarg2);
11695 use_reg (&use, al);
11697 #endif /* TARGET_MACHO */
11699 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11701 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11702 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11704 if (sibcall && TARGET_64BIT
11705 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11708 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11709 fnaddr = gen_rtx_REG (Pmode, 40);
11710 emit_move_insn (fnaddr, addr);
11711 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11714 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11716 call = gen_rtx_SET (VOIDmode, retval, call);
11719 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11720 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11721 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11724 call = emit_call_insn (call);
11726 CALL_INSN_FUNCTION_USAGE (call) = use;
11730 /* Clear stack slot assignments remembered from previous functions.
11731 This is called from INIT_EXPANDERS once before RTL is emitted for each
11734 static struct machine_function *
11735 ix86_init_machine_status ()
11737 return ggc_alloc_cleared (sizeof (struct machine_function));
11740 /* Return a MEM corresponding to a stack slot with mode MODE.
11741 Allocate a new slot if necessary.
11743 The RTL for a function can have several slots available: N is
11744 which slot to use. */
11747 assign_386_stack_local (mode, n)
11748 enum machine_mode mode;
11751 struct stack_local_entry *s;
11753 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11756 for (s = ix86_stack_locals; s; s = s->next)
11757 if (s->mode == mode && s->n == n)
11760 s = (struct stack_local_entry *)
11761 ggc_alloc (sizeof (struct stack_local_entry));
11764 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11766 s->next = ix86_stack_locals;
11767 ix86_stack_locals = s;
11771 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11773 static GTY(()) rtx ix86_tls_symbol;
11775 ix86_tls_get_addr ()
11778 if (!ix86_tls_symbol)
11780 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11781 (TARGET_GNU_TLS && !TARGET_64BIT)
11782 ? "___tls_get_addr"
11783 : "__tls_get_addr");
11786 return ix86_tls_symbol;
11789 /* Calculate the length of the memory address in the instruction
11790 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11793 memory_address_length (addr)
11796 struct ix86_address parts;
11797 rtx base, index, disp;
11800 if (GET_CODE (addr) == PRE_DEC
11801 || GET_CODE (addr) == POST_INC
11802 || GET_CODE (addr) == PRE_MODIFY
11803 || GET_CODE (addr) == POST_MODIFY)
11806 if (! ix86_decompose_address (addr, &parts))
11810 index = parts.index;
11814 /* Register Indirect. */
11815 if (base && !index && !disp)
11817 /* Special cases: ebp and esp need the two-byte modrm form. */
11818 if (addr == stack_pointer_rtx
11819 || addr == arg_pointer_rtx
11820 || addr == frame_pointer_rtx
11821 || addr == hard_frame_pointer_rtx)
11825 /* Direct Addressing. */
11826 else if (disp && !base && !index)
11831 /* Find the length of the displacement constant. */
11834 if (GET_CODE (disp) == CONST_INT
11835 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11841 /* An index requires the two-byte modrm form. */
11849 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11850 is set, expect that insn have 8bit immediate alternative. */
11852 ix86_attr_length_immediate_default (insn, shortform)
11858 extract_insn_cached (insn);
11859 for (i = recog_data.n_operands - 1; i >= 0; --i)
11860 if (CONSTANT_P (recog_data.operand[i]))
11865 && GET_CODE (recog_data.operand[i]) == CONST_INT
11866 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11870 switch (get_attr_mode (insn))
11881 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11886 fatal_insn ("unknown insn mode", insn);
11892 /* Compute default value for "length_address" attribute. */
11894 ix86_attr_length_address_default (insn)
11898 extract_insn_cached (insn);
11899 for (i = recog_data.n_operands - 1; i >= 0; --i)
11900 if (GET_CODE (recog_data.operand[i]) == MEM)
11902 return memory_address_length (XEXP (recog_data.operand[i], 0));
11908 /* Return the maximum number of instructions a cpu can issue. */
11915 case PROCESSOR_PENTIUM:
11919 case PROCESSOR_PENTIUMPRO:
11920 case PROCESSOR_PENTIUM4:
11921 case PROCESSOR_ATHLON:
11930 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11931 by DEP_INSN and nothing set by DEP_INSN. */
11934 ix86_flags_dependant (insn, dep_insn, insn_type)
11935 rtx insn, dep_insn;
11936 enum attr_type insn_type;
11940 /* Simplify the test for uninteresting insns. */
11941 if (insn_type != TYPE_SETCC
11942 && insn_type != TYPE_ICMOV
11943 && insn_type != TYPE_FCMOV
11944 && insn_type != TYPE_IBR)
11947 if ((set = single_set (dep_insn)) != 0)
11949 set = SET_DEST (set);
11952 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11953 && XVECLEN (PATTERN (dep_insn), 0) == 2
11954 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11955 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11957 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11958 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11963 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11966 /* This test is true if the dependent insn reads the flags but
11967 not any other potentially set register. */
11968 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11971 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11977 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11978 address with operands set by DEP_INSN. */
11981 ix86_agi_dependant (insn, dep_insn, insn_type)
11982 rtx insn, dep_insn;
11983 enum attr_type insn_type;
11987 if (insn_type == TYPE_LEA
11990 addr = PATTERN (insn);
11991 if (GET_CODE (addr) == SET)
11993 else if (GET_CODE (addr) == PARALLEL
11994 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11995 addr = XVECEXP (addr, 0, 0);
11998 addr = SET_SRC (addr);
12003 extract_insn_cached (insn);
12004 for (i = recog_data.n_operands - 1; i >= 0; --i)
12005 if (GET_CODE (recog_data.operand[i]) == MEM)
12007 addr = XEXP (recog_data.operand[i], 0);
12014 return modified_in_p (addr, dep_insn);
12018 ix86_adjust_cost (insn, link, dep_insn, cost)
12019 rtx insn, link, dep_insn;
12022 enum attr_type insn_type, dep_insn_type;
12023 enum attr_memory memory, dep_memory;
12025 int dep_insn_code_number;
12027 /* Anti and output dependencies have zero cost on all CPUs. */
12028 if (REG_NOTE_KIND (link) != 0)
12031 dep_insn_code_number = recog_memoized (dep_insn);
12033 /* If we can't recognize the insns, we can't really do anything. */
12034 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12037 insn_type = get_attr_type (insn);
12038 dep_insn_type = get_attr_type (dep_insn);
12042 case PROCESSOR_PENTIUM:
12043 /* Address Generation Interlock adds a cycle of latency. */
12044 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12047 /* ??? Compares pair with jump/setcc. */
12048 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12051 /* Floating point stores require value to be ready one cycle earlier. */
12052 if (insn_type == TYPE_FMOV
12053 && get_attr_memory (insn) == MEMORY_STORE
12054 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12058 case PROCESSOR_PENTIUMPRO:
12059 memory = get_attr_memory (insn);
12060 dep_memory = get_attr_memory (dep_insn);
12062 /* Since we can't represent delayed latencies of load+operation,
12063 increase the cost here for non-imov insns. */
12064 if (dep_insn_type != TYPE_IMOV
12065 && dep_insn_type != TYPE_FMOV
12066 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12069 /* INT->FP conversion is expensive. */
12070 if (get_attr_fp_int_src (dep_insn))
12073 /* There is one cycle extra latency between an FP op and a store. */
12074 if (insn_type == TYPE_FMOV
12075 && (set = single_set (dep_insn)) != NULL_RTX
12076 && (set2 = single_set (insn)) != NULL_RTX
12077 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12078 && GET_CODE (SET_DEST (set2)) == MEM)
12081 /* Show ability of reorder buffer to hide latency of load by executing
12082 in parallel with previous instruction in case
12083 previous instruction is not needed to compute the address. */
12084 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12085 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12087 /* Claim moves to take one cycle, as core can issue one load
12088 at time and the next load can start cycle later. */
12089 if (dep_insn_type == TYPE_IMOV
12090 || dep_insn_type == TYPE_FMOV)
12098 memory = get_attr_memory (insn);
12099 dep_memory = get_attr_memory (dep_insn);
12100 /* The esp dependency is resolved before the instruction is really
12102 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12103 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12106 /* Since we can't represent delayed latencies of load+operation,
12107 increase the cost here for non-imov insns. */
12108 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12109 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12111 /* INT->FP conversion is expensive. */
12112 if (get_attr_fp_int_src (dep_insn))
12115 /* Show ability of reorder buffer to hide latency of load by executing
12116 in parallel with previous instruction in case
12117 previous instruction is not needed to compute the address. */
12118 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12119 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12121 /* Claim moves to take one cycle, as core can issue one load
12122 at time and the next load can start cycle later. */
12123 if (dep_insn_type == TYPE_IMOV
12124 || dep_insn_type == TYPE_FMOV)
12133 case PROCESSOR_ATHLON:
12135 memory = get_attr_memory (insn);
12136 dep_memory = get_attr_memory (dep_insn);
12138 /* Show ability of reorder buffer to hide latency of load by executing
12139 in parallel with previous instruction in case
12140 previous instruction is not needed to compute the address. */
12141 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12142 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12144 /* Claim moves to take one cycle, as core can issue one load
12145 at time and the next load can start cycle later. */
12146 if (dep_insn_type == TYPE_IMOV
12147 || dep_insn_type == TYPE_FMOV)
12149 else if (cost >= 3)
12164 struct ppro_sched_data
12167 int issued_this_cycle;
12171 static enum attr_ppro_uops
12172 ix86_safe_ppro_uops (insn)
12175 if (recog_memoized (insn) >= 0)
12176 return get_attr_ppro_uops (insn);
12178 return PPRO_UOPS_MANY;
12182 ix86_dump_ppro_packet (dump)
12185 if (ix86_sched_data.ppro.decode[0])
12187 fprintf (dump, "PPRO packet: %d",
12188 INSN_UID (ix86_sched_data.ppro.decode[0]));
12189 if (ix86_sched_data.ppro.decode[1])
12190 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12191 if (ix86_sched_data.ppro.decode[2])
12192 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12193 fputc ('\n', dump);
12197 /* We're beginning a new block. Initialize data structures as necessary. */
12200 ix86_sched_init (dump, sched_verbose, veclen)
12201 FILE *dump ATTRIBUTE_UNUSED;
12202 int sched_verbose ATTRIBUTE_UNUSED;
12203 int veclen ATTRIBUTE_UNUSED;
12205 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12208 /* Shift INSN to SLOT, and shift everything else down. */
12211 ix86_reorder_insn (insnp, slot)
12218 insnp[0] = insnp[1];
12219 while (++insnp != slot);
12225 ix86_sched_reorder_ppro (ready, e_ready)
12230 enum attr_ppro_uops cur_uops;
12231 int issued_this_cycle;
12235 /* At this point .ppro.decode contains the state of the three
12236 decoders from last "cycle". That is, those insns that were
12237 actually independent. But here we're scheduling for the
12238 decoder, and we may find things that are decodable in the
12241 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12242 issued_this_cycle = 0;
12245 cur_uops = ix86_safe_ppro_uops (*insnp);
12247 /* If the decoders are empty, and we've a complex insn at the
12248 head of the priority queue, let it issue without complaint. */
12249 if (decode[0] == NULL)
12251 if (cur_uops == PPRO_UOPS_MANY)
12253 decode[0] = *insnp;
12257 /* Otherwise, search for a 2-4 uop unsn to issue. */
12258 while (cur_uops != PPRO_UOPS_FEW)
12260 if (insnp == ready)
12262 cur_uops = ix86_safe_ppro_uops (*--insnp);
12265 /* If so, move it to the head of the line. */
12266 if (cur_uops == PPRO_UOPS_FEW)
12267 ix86_reorder_insn (insnp, e_ready);
12269 /* Issue the head of the queue. */
12270 issued_this_cycle = 1;
12271 decode[0] = *e_ready--;
12274 /* Look for simple insns to fill in the other two slots. */
12275 for (i = 1; i < 3; ++i)
12276 if (decode[i] == NULL)
12278 if (ready > e_ready)
12282 cur_uops = ix86_safe_ppro_uops (*insnp);
12283 while (cur_uops != PPRO_UOPS_ONE)
12285 if (insnp == ready)
12287 cur_uops = ix86_safe_ppro_uops (*--insnp);
12290 /* Found one. Move it to the head of the queue and issue it. */
12291 if (cur_uops == PPRO_UOPS_ONE)
12293 ix86_reorder_insn (insnp, e_ready);
12294 decode[i] = *e_ready--;
12295 issued_this_cycle++;
12299 /* ??? Didn't find one. Ideally, here we would do a lazy split
12300 of 2-uop insns, issue one and queue the other. */
12304 if (issued_this_cycle == 0)
12305 issued_this_cycle = 1;
12306 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12309 /* We are about to being issuing insns for this clock cycle.
12310 Override the default sort algorithm to better slot instructions. */
12312 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12313 FILE *dump ATTRIBUTE_UNUSED;
12314 int sched_verbose ATTRIBUTE_UNUSED;
12317 int clock_var ATTRIBUTE_UNUSED;
12319 int n_ready = *n_readyp;
12320 rtx *e_ready = ready + n_ready - 1;
12322 /* Make sure to go ahead and initialize key items in
12323 ix86_sched_data if we are not going to bother trying to
12324 reorder the ready queue. */
12327 ix86_sched_data.ppro.issued_this_cycle = 1;
12336 case PROCESSOR_PENTIUMPRO:
12337 ix86_sched_reorder_ppro (ready, e_ready);
12342 return ix86_issue_rate ();
12345 /* We are about to issue INSN. Return the number of insns left on the
12346 ready queue that can be issued this cycle. */
12349 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12353 int can_issue_more;
12359 return can_issue_more - 1;
12361 case PROCESSOR_PENTIUMPRO:
12363 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12365 if (uops == PPRO_UOPS_MANY)
12368 ix86_dump_ppro_packet (dump);
12369 ix86_sched_data.ppro.decode[0] = insn;
12370 ix86_sched_data.ppro.decode[1] = NULL;
12371 ix86_sched_data.ppro.decode[2] = NULL;
12373 ix86_dump_ppro_packet (dump);
12374 ix86_sched_data.ppro.decode[0] = NULL;
12376 else if (uops == PPRO_UOPS_FEW)
12379 ix86_dump_ppro_packet (dump);
12380 ix86_sched_data.ppro.decode[0] = insn;
12381 ix86_sched_data.ppro.decode[1] = NULL;
12382 ix86_sched_data.ppro.decode[2] = NULL;
12386 for (i = 0; i < 3; ++i)
12387 if (ix86_sched_data.ppro.decode[i] == NULL)
12389 ix86_sched_data.ppro.decode[i] = insn;
12397 ix86_dump_ppro_packet (dump);
12398 ix86_sched_data.ppro.decode[0] = NULL;
12399 ix86_sched_data.ppro.decode[1] = NULL;
12400 ix86_sched_data.ppro.decode[2] = NULL;
12404 return --ix86_sched_data.ppro.issued_this_cycle;
12409 ia32_use_dfa_pipeline_interface ()
12411 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12416 /* How many alternative schedules to try. This should be as wide as the
12417 scheduling freedom in the DFA, but no wider. Making this value too
12418 large results extra work for the scheduler. */
12421 ia32_multipass_dfa_lookahead ()
12423 if (ix86_cpu == PROCESSOR_PENTIUM)
12430 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12431 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12435 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12437 rtx dstref, srcref, dstreg, srcreg;
12441 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12443 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12447 /* Subroutine of above to actually do the updating by recursively walking
12451 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12453 rtx dstref, srcref, dstreg, srcreg;
12455 enum rtx_code code = GET_CODE (x);
12456 const char *format_ptr = GET_RTX_FORMAT (code);
12459 if (code == MEM && XEXP (x, 0) == dstreg)
12460 MEM_COPY_ATTRIBUTES (x, dstref);
12461 else if (code == MEM && XEXP (x, 0) == srcreg)
12462 MEM_COPY_ATTRIBUTES (x, srcref);
12464 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12466 if (*format_ptr == 'e')
12467 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12469 else if (*format_ptr == 'E')
12470 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12471 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12476 /* Compute the alignment given to a constant that is being placed in memory.
12477 EXP is the constant and ALIGN is the alignment that the object would
12479 The value of this function is used instead of that alignment to align
12483 ix86_constant_alignment (exp, align)
12487 if (TREE_CODE (exp) == REAL_CST)
12489 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12491 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12494 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12501 /* Compute the alignment for a static variable.
12502 TYPE is the data type, and ALIGN is the alignment that
12503 the object would ordinarily have. The value of this function is used
12504 instead of that alignment to align the object. */
12507 ix86_data_alignment (type, align)
12511 if (AGGREGATE_TYPE_P (type)
12512 && TYPE_SIZE (type)
12513 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12514 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12515 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12518 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12519 to 16byte boundary. */
12522 if (AGGREGATE_TYPE_P (type)
12523 && TYPE_SIZE (type)
12524 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12525 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12526 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12530 if (TREE_CODE (type) == ARRAY_TYPE)
12532 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12534 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12537 else if (TREE_CODE (type) == COMPLEX_TYPE)
12540 if (TYPE_MODE (type) == DCmode && align < 64)
12542 if (TYPE_MODE (type) == XCmode && align < 128)
12545 else if ((TREE_CODE (type) == RECORD_TYPE
12546 || TREE_CODE (type) == UNION_TYPE
12547 || TREE_CODE (type) == QUAL_UNION_TYPE)
12548 && TYPE_FIELDS (type))
12550 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12552 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12555 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12556 || TREE_CODE (type) == INTEGER_TYPE)
12558 if (TYPE_MODE (type) == DFmode && align < 64)
12560 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12567 /* Compute the alignment for a local variable.
12568 TYPE is the data type, and ALIGN is the alignment that
12569 the object would ordinarily have. The value of this macro is used
12570 instead of that alignment to align the object. */
12573 ix86_local_alignment (type, align)
12577 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12578 to 16byte boundary. */
12581 if (AGGREGATE_TYPE_P (type)
12582 && TYPE_SIZE (type)
12583 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12584 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12585 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12588 if (TREE_CODE (type) == ARRAY_TYPE)
12590 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12592 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12595 else if (TREE_CODE (type) == COMPLEX_TYPE)
12597 if (TYPE_MODE (type) == DCmode && align < 64)
12599 if (TYPE_MODE (type) == XCmode && align < 128)
12602 else if ((TREE_CODE (type) == RECORD_TYPE
12603 || TREE_CODE (type) == UNION_TYPE
12604 || TREE_CODE (type) == QUAL_UNION_TYPE)
12605 && TYPE_FIELDS (type))
12607 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12609 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12612 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12613 || TREE_CODE (type) == INTEGER_TYPE)
12616 if (TYPE_MODE (type) == DFmode && align < 64)
12618 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12624 /* Emit RTL insns to initialize the variable parts of a trampoline.
12625 FNADDR is an RTX for the address of the function's pure code.
12626 CXT is an RTX for the static chain value for the function. */
12628 x86_initialize_trampoline (tramp, fnaddr, cxt)
12629 rtx tramp, fnaddr, cxt;
12633 /* Compute offset from the end of the jmp to the target function. */
12634 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12635 plus_constant (tramp, 10),
12636 NULL_RTX, 1, OPTAB_DIRECT);
12637 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12638 gen_int_mode (0xb9, QImode));
12639 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12640 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12641 gen_int_mode (0xe9, QImode));
12642 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12647 /* Try to load address using shorter movl instead of movabs.
12648 We may want to support movq for kernel mode, but kernel does not use
12649 trampolines at the moment. */
12650 if (x86_64_zero_extended_value (fnaddr))
12652 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12653 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12654 gen_int_mode (0xbb41, HImode));
12655 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12656 gen_lowpart (SImode, fnaddr));
12661 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12662 gen_int_mode (0xbb49, HImode));
12663 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12667 /* Load static chain using movabs to r10. */
12668 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12669 gen_int_mode (0xba49, HImode));
12670 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12673 /* Jump to the r11 */
12674 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12675 gen_int_mode (0xff49, HImode));
12676 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12677 gen_int_mode (0xe3, QImode));
12679 if (offset > TRAMPOLINE_SIZE)
12683 #ifdef TRANSFER_FROM_TRAMPOLINE
12684 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12685 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12689 #define def_builtin(MASK, NAME, TYPE, CODE) \
12691 if ((MASK) & target_flags) \
12692 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12693 NULL, NULL_TREE); \
12696 struct builtin_description
12698 const unsigned int mask;
12699 const enum insn_code icode;
12700 const char *const name;
12701 const enum ix86_builtins code;
12702 const enum rtx_code comparison;
12703 const unsigned int flag;
12706 /* Used for builtins that are enabled both by -msse and -msse2. */
12707 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12709 static const struct builtin_description bdesc_comi[] =
12711 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12712 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12713 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12714 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12715 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12716 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12717 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12718 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12719 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12720 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12721 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12722 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12723 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12734 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12737 static const struct builtin_description bdesc_2arg[] =
12740 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12741 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12742 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12743 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12744 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12745 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12746 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12747 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12749 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12750 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12751 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12752 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12753 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12754 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12755 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12756 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12757 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12758 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12759 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12760 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12761 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12762 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12763 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12764 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12765 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12766 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12767 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12768 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12770 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12771 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12772 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12773 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12775 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12776 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12777 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12778 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12780 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12781 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12782 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12783 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12784 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12787 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12788 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12789 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12790 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12791 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12792 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12794 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12795 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12797 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12798 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12799 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12800 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12801 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12804 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12805 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12809 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12812 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12813 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12815 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12816 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12817 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12818 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12819 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12820 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12822 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12823 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12824 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12825 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12827 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12828 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12829 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12830 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12831 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12832 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12835 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12836 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12837 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12839 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12840 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12842 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12843 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12844 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12845 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12846 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12847 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12849 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12850 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12851 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12852 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12853 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12854 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12856 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12857 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12858 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12859 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12861 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12862 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12875 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12876 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12877 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12878 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12879 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12880 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12881 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12882 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12883 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12884 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12885 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12886 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12887 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12888 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12889 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12890 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12891 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12892 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12893 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12895 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12919 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12920 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12921 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12922 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12923 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12924 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12925 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12926 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12929 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12995 static const struct builtin_description bdesc_1arg[] =
12997 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12998 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13000 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13001 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13002 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13004 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13005 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13006 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13007 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
13038 ix86_init_builtins ()
13041 ix86_init_mmx_sse_builtins ();
13044 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13045 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13048 ix86_init_mmx_sse_builtins ()
13050 const struct builtin_description * d;
13053 tree pchar_type_node = build_pointer_type (char_type_node);
13054 tree pcchar_type_node = build_pointer_type (
13055 build_type_variant (char_type_node, 1, 0));
13056 tree pfloat_type_node = build_pointer_type (float_type_node);
13057 tree pcfloat_type_node = build_pointer_type (
13058 build_type_variant (float_type_node, 1, 0));
13059 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13060 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13061 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13064 tree int_ftype_v4sf_v4sf
13065 = build_function_type_list (integer_type_node,
13066 V4SF_type_node, V4SF_type_node, NULL_TREE);
13067 tree v4si_ftype_v4sf_v4sf
13068 = build_function_type_list (V4SI_type_node,
13069 V4SF_type_node, V4SF_type_node, NULL_TREE);
13070 /* MMX/SSE/integer conversions. */
13071 tree int_ftype_v4sf
13072 = build_function_type_list (integer_type_node,
13073 V4SF_type_node, NULL_TREE);
13074 tree int_ftype_v8qi
13075 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13076 tree v4sf_ftype_v4sf_int
13077 = build_function_type_list (V4SF_type_node,
13078 V4SF_type_node, integer_type_node, NULL_TREE);
13079 tree v4sf_ftype_v4sf_v2si
13080 = build_function_type_list (V4SF_type_node,
13081 V4SF_type_node, V2SI_type_node, NULL_TREE);
13082 tree int_ftype_v4hi_int
13083 = build_function_type_list (integer_type_node,
13084 V4HI_type_node, integer_type_node, NULL_TREE);
13085 tree v4hi_ftype_v4hi_int_int
13086 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13087 integer_type_node, integer_type_node,
13089 /* Miscellaneous. */
13090 tree v8qi_ftype_v4hi_v4hi
13091 = build_function_type_list (V8QI_type_node,
13092 V4HI_type_node, V4HI_type_node, NULL_TREE);
13093 tree v4hi_ftype_v2si_v2si
13094 = build_function_type_list (V4HI_type_node,
13095 V2SI_type_node, V2SI_type_node, NULL_TREE);
13096 tree v4sf_ftype_v4sf_v4sf_int
13097 = build_function_type_list (V4SF_type_node,
13098 V4SF_type_node, V4SF_type_node,
13099 integer_type_node, NULL_TREE);
13100 tree v2si_ftype_v4hi_v4hi
13101 = build_function_type_list (V2SI_type_node,
13102 V4HI_type_node, V4HI_type_node, NULL_TREE);
13103 tree v4hi_ftype_v4hi_int
13104 = build_function_type_list (V4HI_type_node,
13105 V4HI_type_node, integer_type_node, NULL_TREE);
13106 tree v4hi_ftype_v4hi_di
13107 = build_function_type_list (V4HI_type_node,
13108 V4HI_type_node, long_long_unsigned_type_node,
13110 tree v2si_ftype_v2si_di
13111 = build_function_type_list (V2SI_type_node,
13112 V2SI_type_node, long_long_unsigned_type_node,
13114 tree void_ftype_void
13115 = build_function_type (void_type_node, void_list_node);
13116 tree void_ftype_unsigned
13117 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13118 tree unsigned_ftype_void
13119 = build_function_type (unsigned_type_node, void_list_node);
13121 = build_function_type (long_long_unsigned_type_node, void_list_node);
13122 tree v4sf_ftype_void
13123 = build_function_type (V4SF_type_node, void_list_node);
13124 tree v2si_ftype_v4sf
13125 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13126 /* Loads/stores. */
13127 tree void_ftype_v8qi_v8qi_pchar
13128 = build_function_type_list (void_type_node,
13129 V8QI_type_node, V8QI_type_node,
13130 pchar_type_node, NULL_TREE);
13131 tree v4sf_ftype_pcfloat
13132 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13133 /* @@@ the type is bogus */
13134 tree v4sf_ftype_v4sf_pv2si
13135 = build_function_type_list (V4SF_type_node,
13136 V4SF_type_node, pv2si_type_node, NULL_TREE);
13137 tree void_ftype_pv2si_v4sf
13138 = build_function_type_list (void_type_node,
13139 pv2si_type_node, V4SF_type_node, NULL_TREE);
13140 tree void_ftype_pfloat_v4sf
13141 = build_function_type_list (void_type_node,
13142 pfloat_type_node, V4SF_type_node, NULL_TREE);
13143 tree void_ftype_pdi_di
13144 = build_function_type_list (void_type_node,
13145 pdi_type_node, long_long_unsigned_type_node,
13147 tree void_ftype_pv2di_v2di
13148 = build_function_type_list (void_type_node,
13149 pv2di_type_node, V2DI_type_node, NULL_TREE);
13150 /* Normal vector unops. */
13151 tree v4sf_ftype_v4sf
13152 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13154 /* Normal vector binops. */
13155 tree v4sf_ftype_v4sf_v4sf
13156 = build_function_type_list (V4SF_type_node,
13157 V4SF_type_node, V4SF_type_node, NULL_TREE);
13158 tree v8qi_ftype_v8qi_v8qi
13159 = build_function_type_list (V8QI_type_node,
13160 V8QI_type_node, V8QI_type_node, NULL_TREE);
13161 tree v4hi_ftype_v4hi_v4hi
13162 = build_function_type_list (V4HI_type_node,
13163 V4HI_type_node, V4HI_type_node, NULL_TREE);
13164 tree v2si_ftype_v2si_v2si
13165 = build_function_type_list (V2SI_type_node,
13166 V2SI_type_node, V2SI_type_node, NULL_TREE);
13167 tree di_ftype_di_di
13168 = build_function_type_list (long_long_unsigned_type_node,
13169 long_long_unsigned_type_node,
13170 long_long_unsigned_type_node, NULL_TREE);
13172 tree v2si_ftype_v2sf
13173 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13174 tree v2sf_ftype_v2si
13175 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13176 tree v2si_ftype_v2si
13177 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13178 tree v2sf_ftype_v2sf
13179 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13180 tree v2sf_ftype_v2sf_v2sf
13181 = build_function_type_list (V2SF_type_node,
13182 V2SF_type_node, V2SF_type_node, NULL_TREE);
13183 tree v2si_ftype_v2sf_v2sf
13184 = build_function_type_list (V2SI_type_node,
13185 V2SF_type_node, V2SF_type_node, NULL_TREE);
13186 tree pint_type_node = build_pointer_type (integer_type_node);
13187 tree pcint_type_node = build_pointer_type (
13188 build_type_variant (integer_type_node, 1, 0));
13189 tree pdouble_type_node = build_pointer_type (double_type_node);
13190 tree pcdouble_type_node = build_pointer_type (
13191 build_type_variant (double_type_node, 1, 0));
13192 tree int_ftype_v2df_v2df
13193 = build_function_type_list (integer_type_node,
13194 V2DF_type_node, V2DF_type_node, NULL_TREE);
13197 = build_function_type (intTI_type_node, void_list_node);
13198 tree v2di_ftype_void
13199 = build_function_type (V2DI_type_node, void_list_node);
13200 tree ti_ftype_ti_ti
13201 = build_function_type_list (intTI_type_node,
13202 intTI_type_node, intTI_type_node, NULL_TREE);
13203 tree void_ftype_pcvoid
13204 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13206 = build_function_type_list (V2DI_type_node,
13207 long_long_unsigned_type_node, NULL_TREE);
13209 = build_function_type_list (long_long_unsigned_type_node,
13210 V2DI_type_node, NULL_TREE);
13211 tree v4sf_ftype_v4si
13212 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13213 tree v4si_ftype_v4sf
13214 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13215 tree v2df_ftype_v4si
13216 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13217 tree v4si_ftype_v2df
13218 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13219 tree v2si_ftype_v2df
13220 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13221 tree v4sf_ftype_v2df
13222 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13223 tree v2df_ftype_v2si
13224 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13225 tree v2df_ftype_v4sf
13226 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13227 tree int_ftype_v2df
13228 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13229 tree v2df_ftype_v2df_int
13230 = build_function_type_list (V2DF_type_node,
13231 V2DF_type_node, integer_type_node, NULL_TREE);
13232 tree v4sf_ftype_v4sf_v2df
13233 = build_function_type_list (V4SF_type_node,
13234 V4SF_type_node, V2DF_type_node, NULL_TREE);
13235 tree v2df_ftype_v2df_v4sf
13236 = build_function_type_list (V2DF_type_node,
13237 V2DF_type_node, V4SF_type_node, NULL_TREE);
13238 tree v2df_ftype_v2df_v2df_int
13239 = build_function_type_list (V2DF_type_node,
13240 V2DF_type_node, V2DF_type_node,
13243 tree v2df_ftype_v2df_pv2si
13244 = build_function_type_list (V2DF_type_node,
13245 V2DF_type_node, pv2si_type_node, NULL_TREE);
13246 tree void_ftype_pv2si_v2df
13247 = build_function_type_list (void_type_node,
13248 pv2si_type_node, V2DF_type_node, NULL_TREE);
13249 tree void_ftype_pdouble_v2df
13250 = build_function_type_list (void_type_node,
13251 pdouble_type_node, V2DF_type_node, NULL_TREE);
13252 tree void_ftype_pint_int
13253 = build_function_type_list (void_type_node,
13254 pint_type_node, integer_type_node, NULL_TREE);
13255 tree void_ftype_v16qi_v16qi_pchar
13256 = build_function_type_list (void_type_node,
13257 V16QI_type_node, V16QI_type_node,
13258 pchar_type_node, NULL_TREE);
13259 tree v2df_ftype_pcdouble
13260 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13261 tree v2df_ftype_v2df_v2df
13262 = build_function_type_list (V2DF_type_node,
13263 V2DF_type_node, V2DF_type_node, NULL_TREE);
13264 tree v16qi_ftype_v16qi_v16qi
13265 = build_function_type_list (V16QI_type_node,
13266 V16QI_type_node, V16QI_type_node, NULL_TREE);
13267 tree v8hi_ftype_v8hi_v8hi
13268 = build_function_type_list (V8HI_type_node,
13269 V8HI_type_node, V8HI_type_node, NULL_TREE);
13270 tree v4si_ftype_v4si_v4si
13271 = build_function_type_list (V4SI_type_node,
13272 V4SI_type_node, V4SI_type_node, NULL_TREE);
13273 tree v2di_ftype_v2di_v2di
13274 = build_function_type_list (V2DI_type_node,
13275 V2DI_type_node, V2DI_type_node, NULL_TREE);
13276 tree v2di_ftype_v2df_v2df
13277 = build_function_type_list (V2DI_type_node,
13278 V2DF_type_node, V2DF_type_node, NULL_TREE);
13279 tree v2df_ftype_v2df
13280 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13281 tree v2df_ftype_double
13282 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13283 tree v2df_ftype_double_double
13284 = build_function_type_list (V2DF_type_node,
13285 double_type_node, double_type_node, NULL_TREE);
13286 tree int_ftype_v8hi_int
13287 = build_function_type_list (integer_type_node,
13288 V8HI_type_node, integer_type_node, NULL_TREE);
13289 tree v8hi_ftype_v8hi_int_int
13290 = build_function_type_list (V8HI_type_node,
13291 V8HI_type_node, integer_type_node,
13292 integer_type_node, NULL_TREE);
13293 tree v2di_ftype_v2di_int
13294 = build_function_type_list (V2DI_type_node,
13295 V2DI_type_node, integer_type_node, NULL_TREE);
13296 tree v4si_ftype_v4si_int
13297 = build_function_type_list (V4SI_type_node,
13298 V4SI_type_node, integer_type_node, NULL_TREE);
13299 tree v8hi_ftype_v8hi_int
13300 = build_function_type_list (V8HI_type_node,
13301 V8HI_type_node, integer_type_node, NULL_TREE);
13302 tree v8hi_ftype_v8hi_v2di
13303 = build_function_type_list (V8HI_type_node,
13304 V8HI_type_node, V2DI_type_node, NULL_TREE);
13305 tree v4si_ftype_v4si_v2di
13306 = build_function_type_list (V4SI_type_node,
13307 V4SI_type_node, V2DI_type_node, NULL_TREE);
13308 tree v4si_ftype_v8hi_v8hi
13309 = build_function_type_list (V4SI_type_node,
13310 V8HI_type_node, V8HI_type_node, NULL_TREE);
13311 tree di_ftype_v8qi_v8qi
13312 = build_function_type_list (long_long_unsigned_type_node,
13313 V8QI_type_node, V8QI_type_node, NULL_TREE);
13314 tree v2di_ftype_v16qi_v16qi
13315 = build_function_type_list (V2DI_type_node,
13316 V16QI_type_node, V16QI_type_node, NULL_TREE);
13317 tree int_ftype_v16qi
13318 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13319 tree v16qi_ftype_pcchar
13320 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13321 tree void_ftype_pchar_v16qi
13322 = build_function_type_list (void_type_node,
13323 pchar_type_node, V16QI_type_node, NULL_TREE);
13324 tree v4si_ftype_pcint
13325 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13326 tree void_ftype_pcint_v4si
13327 = build_function_type_list (void_type_node,
13328 pcint_type_node, V4SI_type_node, NULL_TREE);
13329 tree v2di_ftype_v2di
13330 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13332 /* Add all builtins that are more or less simple operations on two
13334 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13336 /* Use one of the operands; the target can have a different mode for
13337 mask-generating compares. */
13338 enum machine_mode mode;
13343 mode = insn_data[d->icode].operand[1].mode;
13348 type = v16qi_ftype_v16qi_v16qi;
13351 type = v8hi_ftype_v8hi_v8hi;
13354 type = v4si_ftype_v4si_v4si;
13357 type = v2di_ftype_v2di_v2di;
13360 type = v2df_ftype_v2df_v2df;
13363 type = ti_ftype_ti_ti;
13366 type = v4sf_ftype_v4sf_v4sf;
13369 type = v8qi_ftype_v8qi_v8qi;
13372 type = v4hi_ftype_v4hi_v4hi;
13375 type = v2si_ftype_v2si_v2si;
13378 type = di_ftype_di_di;
13385 /* Override for comparisons. */
13386 if (d->icode == CODE_FOR_maskcmpv4sf3
13387 || d->icode == CODE_FOR_maskncmpv4sf3
13388 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13389 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13390 type = v4si_ftype_v4sf_v4sf;
13392 if (d->icode == CODE_FOR_maskcmpv2df3
13393 || d->icode == CODE_FOR_maskncmpv2df3
13394 || d->icode == CODE_FOR_vmmaskcmpv2df3
13395 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13396 type = v2di_ftype_v2df_v2df;
13398 def_builtin (d->mask, d->name, type, d->code);
13401 /* Add the remaining MMX insns with somewhat more complicated types. */
13402 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13403 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13404 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13405 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13406 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13408 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13409 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13410 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13412 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13413 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13415 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13416 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13418 /* comi/ucomi insns. */
13419 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13420 if (d->mask == MASK_SSE2)
13421 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13423 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13425 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13426 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13427 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13429 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13430 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13431 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13432 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13433 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13434 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13435 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13436 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13438 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13439 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13441 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13443 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13444 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13445 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13446 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13447 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13448 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13450 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13451 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13452 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13453 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13455 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13456 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13457 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13458 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13460 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13462 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13464 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13465 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13466 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13467 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13468 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13469 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13471 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13473 /* Original 3DNow! */
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13480 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13481 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13482 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13483 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13484 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13485 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13486 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13487 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13488 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13489 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13490 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13491 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13492 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13493 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13495 /* 3DNow! extension as used in the Athlon CPU. */
13496 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13497 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13498 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13499 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13500 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13501 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13503 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13516 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13521 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13526 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13527 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13552 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13583 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13612 /* Errors in the source file can cause expand_expr to return const0_rtx
13613 where we expect a vector. To avoid crashing, use one of the vector
13614 clear instructions. */
13616 safe_vector_operand (x, mode)
13618 enum machine_mode mode;
13620 if (x != const0_rtx)
13622 x = gen_reg_rtx (mode);
13624 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13625 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13626 : gen_rtx_SUBREG (DImode, x, 0)));
13628 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13629 : gen_rtx_SUBREG (V4SFmode, x, 0),
13630 CONST0_RTX (V4SFmode)));
13634 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13637 ix86_expand_binop_builtin (icode, arglist, target)
13638 enum insn_code icode;
13643 tree arg0 = TREE_VALUE (arglist);
13644 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13645 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13646 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13647 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13648 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13649 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13651 if (VECTOR_MODE_P (mode0))
13652 op0 = safe_vector_operand (op0, mode0);
13653 if (VECTOR_MODE_P (mode1))
13654 op1 = safe_vector_operand (op1, mode1);
13657 || GET_MODE (target) != tmode
13658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13659 target = gen_reg_rtx (tmode);
13661 if (GET_MODE (op1) == SImode && mode1 == TImode)
13663 rtx x = gen_reg_rtx (V4SImode);
13664 emit_insn (gen_sse2_loadd (x, op1));
13665 op1 = gen_lowpart (TImode, x);
13668 /* In case the insn wants input operands in modes different from
13669 the result, abort. */
13670 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13673 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13674 op0 = copy_to_mode_reg (mode0, op0);
13675 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13676 op1 = copy_to_mode_reg (mode1, op1);
13678 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13679 yet one of the two must not be a memory. This is normally enforced
13680 by expanders, but we didn't bother to create one here. */
13681 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13682 op0 = copy_to_mode_reg (mode0, op0);
13684 pat = GEN_FCN (icode) (target, op0, op1);
13691 /* Subroutine of ix86_expand_builtin to take care of stores. */
13694 ix86_expand_store_builtin (icode, arglist)
13695 enum insn_code icode;
13699 tree arg0 = TREE_VALUE (arglist);
13700 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13701 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13702 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13703 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13704 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13706 if (VECTOR_MODE_P (mode1))
13707 op1 = safe_vector_operand (op1, mode1);
13709 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13710 op1 = copy_to_mode_reg (mode1, op1);
13712 pat = GEN_FCN (icode) (op0, op1);
13718 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13721 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13722 enum insn_code icode;
13728 tree arg0 = TREE_VALUE (arglist);
13729 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13731 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13734 || GET_MODE (target) != tmode
13735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13736 target = gen_reg_rtx (tmode);
13738 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13741 if (VECTOR_MODE_P (mode0))
13742 op0 = safe_vector_operand (op0, mode0);
13744 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13745 op0 = copy_to_mode_reg (mode0, op0);
13748 pat = GEN_FCN (icode) (target, op0);
13755 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13756 sqrtss, rsqrtss, rcpss. */
13759 ix86_expand_unop1_builtin (icode, arglist, target)
13760 enum insn_code icode;
13765 tree arg0 = TREE_VALUE (arglist);
13766 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13767 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13768 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13771 || GET_MODE (target) != tmode
13772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13773 target = gen_reg_rtx (tmode);
13775 if (VECTOR_MODE_P (mode0))
13776 op0 = safe_vector_operand (op0, mode0);
13778 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13779 op0 = copy_to_mode_reg (mode0, op0);
13782 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13783 op1 = copy_to_mode_reg (mode0, op1);
13785 pat = GEN_FCN (icode) (target, op0, op1);
13792 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13795 ix86_expand_sse_compare (d, arglist, target)
13796 const struct builtin_description *d;
13801 tree arg0 = TREE_VALUE (arglist);
13802 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13803 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13804 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13806 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13807 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13808 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13809 enum rtx_code comparison = d->comparison;
13811 if (VECTOR_MODE_P (mode0))
13812 op0 = safe_vector_operand (op0, mode0);
13813 if (VECTOR_MODE_P (mode1))
13814 op1 = safe_vector_operand (op1, mode1);
13816 /* Swap operands if we have a comparison that isn't available in
13820 rtx tmp = gen_reg_rtx (mode1);
13821 emit_move_insn (tmp, op1);
13827 || GET_MODE (target) != tmode
13828 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13829 target = gen_reg_rtx (tmode);
13831 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13832 op0 = copy_to_mode_reg (mode0, op0);
13833 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13834 op1 = copy_to_mode_reg (mode1, op1);
13836 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13837 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13844 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13847 ix86_expand_sse_comi (d, arglist, target)
13848 const struct builtin_description *d;
13853 tree arg0 = TREE_VALUE (arglist);
13854 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13855 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13856 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13858 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13859 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13860 enum rtx_code comparison = d->comparison;
13862 if (VECTOR_MODE_P (mode0))
13863 op0 = safe_vector_operand (op0, mode0);
13864 if (VECTOR_MODE_P (mode1))
13865 op1 = safe_vector_operand (op1, mode1);
13867 /* Swap operands if we have a comparison that isn't available in
13876 target = gen_reg_rtx (SImode);
13877 emit_move_insn (target, const0_rtx);
13878 target = gen_rtx_SUBREG (QImode, target, 0);
13880 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13881 op0 = copy_to_mode_reg (mode0, op0);
13882 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13883 op1 = copy_to_mode_reg (mode1, op1);
13885 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13886 pat = GEN_FCN (d->icode) (op0, op1);
13890 emit_insn (gen_rtx_SET (VOIDmode,
13891 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13892 gen_rtx_fmt_ee (comparison, QImode,
13896 return SUBREG_REG (target);
13899 /* Expand an expression EXP that calls a built-in function,
13900 with result going to TARGET if that's convenient
13901 (and in mode MODE if that's convenient).
13902 SUBTARGET may be used as the target for computing one of EXP's operands.
13903 IGNORE is nonzero if the value is to be ignored. */
13906 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13909 rtx subtarget ATTRIBUTE_UNUSED;
13910 enum machine_mode mode ATTRIBUTE_UNUSED;
13911 int ignore ATTRIBUTE_UNUSED;
13913 const struct builtin_description *d;
13915 enum insn_code icode;
13916 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13917 tree arglist = TREE_OPERAND (exp, 1);
13918 tree arg0, arg1, arg2;
13919 rtx op0, op1, op2, pat;
13920 enum machine_mode tmode, mode0, mode1, mode2;
13921 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13925 case IX86_BUILTIN_EMMS:
13926 emit_insn (gen_emms ());
13929 case IX86_BUILTIN_SFENCE:
13930 emit_insn (gen_sfence ());
13933 case IX86_BUILTIN_PEXTRW:
13934 case IX86_BUILTIN_PEXTRW128:
13935 icode = (fcode == IX86_BUILTIN_PEXTRW
13936 ? CODE_FOR_mmx_pextrw
13937 : CODE_FOR_sse2_pextrw);
13938 arg0 = TREE_VALUE (arglist);
13939 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13940 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13941 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13942 tmode = insn_data[icode].operand[0].mode;
13943 mode0 = insn_data[icode].operand[1].mode;
13944 mode1 = insn_data[icode].operand[2].mode;
13946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13947 op0 = copy_to_mode_reg (mode0, op0);
13948 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13950 /* @@@ better error message */
13951 error ("selector must be an immediate");
13952 return gen_reg_rtx (tmode);
13955 || GET_MODE (target) != tmode
13956 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13957 target = gen_reg_rtx (tmode);
13958 pat = GEN_FCN (icode) (target, op0, op1);
13964 case IX86_BUILTIN_PINSRW:
13965 case IX86_BUILTIN_PINSRW128:
13966 icode = (fcode == IX86_BUILTIN_PINSRW
13967 ? CODE_FOR_mmx_pinsrw
13968 : CODE_FOR_sse2_pinsrw);
13969 arg0 = TREE_VALUE (arglist);
13970 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13971 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13972 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13973 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13974 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13975 tmode = insn_data[icode].operand[0].mode;
13976 mode0 = insn_data[icode].operand[1].mode;
13977 mode1 = insn_data[icode].operand[2].mode;
13978 mode2 = insn_data[icode].operand[3].mode;
13980 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13981 op0 = copy_to_mode_reg (mode0, op0);
13982 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13983 op1 = copy_to_mode_reg (mode1, op1);
13984 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13986 /* @@@ better error message */
13987 error ("selector must be an immediate");
13991 || GET_MODE (target) != tmode
13992 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13993 target = gen_reg_rtx (tmode);
13994 pat = GEN_FCN (icode) (target, op0, op1, op2);
14000 case IX86_BUILTIN_MASKMOVQ:
14001 case IX86_BUILTIN_MASKMOVDQU:
14002 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14003 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14004 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14005 : CODE_FOR_sse2_maskmovdqu));
14006 /* Note the arg order is different from the operand order. */
14007 arg1 = TREE_VALUE (arglist);
14008 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14009 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14010 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14011 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14012 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14013 mode0 = insn_data[icode].operand[0].mode;
14014 mode1 = insn_data[icode].operand[1].mode;
14015 mode2 = insn_data[icode].operand[2].mode;
14017 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14018 op0 = copy_to_mode_reg (mode0, op0);
14019 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14020 op1 = copy_to_mode_reg (mode1, op1);
14021 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14022 op2 = copy_to_mode_reg (mode2, op2);
14023 pat = GEN_FCN (icode) (op0, op1, op2);
14029 case IX86_BUILTIN_SQRTSS:
14030 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14031 case IX86_BUILTIN_RSQRTSS:
14032 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14033 case IX86_BUILTIN_RCPSS:
14034 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14036 case IX86_BUILTIN_LOADAPS:
14037 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14039 case IX86_BUILTIN_LOADUPS:
14040 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14042 case IX86_BUILTIN_STOREAPS:
14043 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14045 case IX86_BUILTIN_STOREUPS:
14046 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14048 case IX86_BUILTIN_LOADSS:
14049 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14051 case IX86_BUILTIN_STORESS:
14052 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14054 case IX86_BUILTIN_LOADHPS:
14055 case IX86_BUILTIN_LOADLPS:
14056 case IX86_BUILTIN_LOADHPD:
14057 case IX86_BUILTIN_LOADLPD:
14058 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14059 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14060 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14061 : CODE_FOR_sse2_movlpd);
14062 arg0 = TREE_VALUE (arglist);
14063 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14064 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14065 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14066 tmode = insn_data[icode].operand[0].mode;
14067 mode0 = insn_data[icode].operand[1].mode;
14068 mode1 = insn_data[icode].operand[2].mode;
14070 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14071 op0 = copy_to_mode_reg (mode0, op0);
14072 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14074 || GET_MODE (target) != tmode
14075 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14076 target = gen_reg_rtx (tmode);
14077 pat = GEN_FCN (icode) (target, op0, op1);
14083 case IX86_BUILTIN_STOREHPS:
14084 case IX86_BUILTIN_STORELPS:
14085 case IX86_BUILTIN_STOREHPD:
14086 case IX86_BUILTIN_STORELPD:
14087 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14088 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14089 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14090 : CODE_FOR_sse2_movlpd);
14091 arg0 = TREE_VALUE (arglist);
14092 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14093 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14094 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14095 mode0 = insn_data[icode].operand[1].mode;
14096 mode1 = insn_data[icode].operand[2].mode;
14098 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14099 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14100 op1 = copy_to_mode_reg (mode1, op1);
14102 pat = GEN_FCN (icode) (op0, op0, op1);
14108 case IX86_BUILTIN_MOVNTPS:
14109 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14110 case IX86_BUILTIN_MOVNTQ:
14111 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14113 case IX86_BUILTIN_LDMXCSR:
14114 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14115 target = assign_386_stack_local (SImode, 0);
14116 emit_move_insn (target, op0);
14117 emit_insn (gen_ldmxcsr (target));
14120 case IX86_BUILTIN_STMXCSR:
14121 target = assign_386_stack_local (SImode, 0);
14122 emit_insn (gen_stmxcsr (target));
14123 return copy_to_mode_reg (SImode, target);
14125 case IX86_BUILTIN_SHUFPS:
14126 case IX86_BUILTIN_SHUFPD:
14127 icode = (fcode == IX86_BUILTIN_SHUFPS
14128 ? CODE_FOR_sse_shufps
14129 : CODE_FOR_sse2_shufpd);
14130 arg0 = TREE_VALUE (arglist);
14131 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14132 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14133 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14134 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14135 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14136 tmode = insn_data[icode].operand[0].mode;
14137 mode0 = insn_data[icode].operand[1].mode;
14138 mode1 = insn_data[icode].operand[2].mode;
14139 mode2 = insn_data[icode].operand[3].mode;
14141 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14142 op0 = copy_to_mode_reg (mode0, op0);
14143 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14144 op1 = copy_to_mode_reg (mode1, op1);
14145 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14147 /* @@@ better error message */
14148 error ("mask must be an immediate");
14149 return gen_reg_rtx (tmode);
14152 || GET_MODE (target) != tmode
14153 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14154 target = gen_reg_rtx (tmode);
14155 pat = GEN_FCN (icode) (target, op0, op1, op2);
14161 case IX86_BUILTIN_PSHUFW:
14162 case IX86_BUILTIN_PSHUFD:
14163 case IX86_BUILTIN_PSHUFHW:
14164 case IX86_BUILTIN_PSHUFLW:
14165 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14166 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14167 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14168 : CODE_FOR_mmx_pshufw);
14169 arg0 = TREE_VALUE (arglist);
14170 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14171 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14172 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14173 tmode = insn_data[icode].operand[0].mode;
14174 mode1 = insn_data[icode].operand[1].mode;
14175 mode2 = insn_data[icode].operand[2].mode;
14177 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14178 op0 = copy_to_mode_reg (mode1, op0);
14179 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14181 /* @@@ better error message */
14182 error ("mask must be an immediate");
14186 || GET_MODE (target) != tmode
14187 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14188 target = gen_reg_rtx (tmode);
14189 pat = GEN_FCN (icode) (target, op0, op1);
14195 case IX86_BUILTIN_PSLLDQI128:
14196 case IX86_BUILTIN_PSRLDQI128:
14197 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14198 : CODE_FOR_sse2_lshrti3);
14199 arg0 = TREE_VALUE (arglist);
14200 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14201 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14202 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14203 tmode = insn_data[icode].operand[0].mode;
14204 mode1 = insn_data[icode].operand[1].mode;
14205 mode2 = insn_data[icode].operand[2].mode;
14207 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14209 op0 = copy_to_reg (op0);
14210 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14212 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14214 error ("shift must be an immediate");
14217 target = gen_reg_rtx (V2DImode);
14218 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14224 case IX86_BUILTIN_FEMMS:
14225 emit_insn (gen_femms ());
14228 case IX86_BUILTIN_PAVGUSB:
14229 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14231 case IX86_BUILTIN_PF2ID:
14232 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14234 case IX86_BUILTIN_PFACC:
14235 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14237 case IX86_BUILTIN_PFADD:
14238 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14240 case IX86_BUILTIN_PFCMPEQ:
14241 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14243 case IX86_BUILTIN_PFCMPGE:
14244 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14246 case IX86_BUILTIN_PFCMPGT:
14247 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14249 case IX86_BUILTIN_PFMAX:
14250 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14252 case IX86_BUILTIN_PFMIN:
14253 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14255 case IX86_BUILTIN_PFMUL:
14256 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14258 case IX86_BUILTIN_PFRCP:
14259 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14261 case IX86_BUILTIN_PFRCPIT1:
14262 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14264 case IX86_BUILTIN_PFRCPIT2:
14265 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14267 case IX86_BUILTIN_PFRSQIT1:
14268 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14270 case IX86_BUILTIN_PFRSQRT:
14271 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14273 case IX86_BUILTIN_PFSUB:
14274 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14276 case IX86_BUILTIN_PFSUBR:
14277 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14279 case IX86_BUILTIN_PI2FD:
14280 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14282 case IX86_BUILTIN_PMULHRW:
14283 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14285 case IX86_BUILTIN_PF2IW:
14286 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14288 case IX86_BUILTIN_PFNACC:
14289 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14291 case IX86_BUILTIN_PFPNACC:
14292 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14294 case IX86_BUILTIN_PI2FW:
14295 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14297 case IX86_BUILTIN_PSWAPDSI:
14298 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14300 case IX86_BUILTIN_PSWAPDSF:
14301 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14303 case IX86_BUILTIN_SSE_ZERO:
14304 target = gen_reg_rtx (V4SFmode);
14305 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14308 case IX86_BUILTIN_MMX_ZERO:
14309 target = gen_reg_rtx (DImode);
14310 emit_insn (gen_mmx_clrdi (target));
14313 case IX86_BUILTIN_CLRTI:
14314 target = gen_reg_rtx (V2DImode);
14315 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14319 case IX86_BUILTIN_SQRTSD:
14320 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14321 case IX86_BUILTIN_LOADAPD:
14322 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14323 case IX86_BUILTIN_LOADUPD:
14324 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14326 case IX86_BUILTIN_STOREAPD:
14327 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14328 case IX86_BUILTIN_STOREUPD:
14329 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14331 case IX86_BUILTIN_LOADSD:
14332 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14334 case IX86_BUILTIN_STORESD:
14335 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14337 case IX86_BUILTIN_SETPD1:
14338 target = assign_386_stack_local (DFmode, 0);
14339 arg0 = TREE_VALUE (arglist);
14340 emit_move_insn (adjust_address (target, DFmode, 0),
14341 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14342 op0 = gen_reg_rtx (V2DFmode);
14343 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14344 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14347 case IX86_BUILTIN_SETPD:
14348 target = assign_386_stack_local (V2DFmode, 0);
14349 arg0 = TREE_VALUE (arglist);
14350 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14351 emit_move_insn (adjust_address (target, DFmode, 0),
14352 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14353 emit_move_insn (adjust_address (target, DFmode, 8),
14354 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14355 op0 = gen_reg_rtx (V2DFmode);
14356 emit_insn (gen_sse2_movapd (op0, target));
14359 case IX86_BUILTIN_LOADRPD:
14360 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14361 gen_reg_rtx (V2DFmode), 1);
14362 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14365 case IX86_BUILTIN_LOADPD1:
14366 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14367 gen_reg_rtx (V2DFmode), 1);
14368 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14371 case IX86_BUILTIN_STOREPD1:
14372 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14373 case IX86_BUILTIN_STORERPD:
14374 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14376 case IX86_BUILTIN_CLRPD:
14377 target = gen_reg_rtx (V2DFmode);
14378 emit_insn (gen_sse_clrv2df (target));
14381 case IX86_BUILTIN_MFENCE:
14382 emit_insn (gen_sse2_mfence ());
14384 case IX86_BUILTIN_LFENCE:
14385 emit_insn (gen_sse2_lfence ());
14388 case IX86_BUILTIN_CLFLUSH:
14389 arg0 = TREE_VALUE (arglist);
14390 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14391 icode = CODE_FOR_sse2_clflush;
14392 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14393 op0 = copy_to_mode_reg (Pmode, op0);
14395 emit_insn (gen_sse2_clflush (op0));
14398 case IX86_BUILTIN_MOVNTPD:
14399 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14400 case IX86_BUILTIN_MOVNTDQ:
14401 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14402 case IX86_BUILTIN_MOVNTI:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14405 case IX86_BUILTIN_LOADDQA:
14406 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14407 case IX86_BUILTIN_LOADDQU:
14408 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14409 case IX86_BUILTIN_LOADD:
14410 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14412 case IX86_BUILTIN_STOREDQA:
14413 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14414 case IX86_BUILTIN_STOREDQU:
14415 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14416 case IX86_BUILTIN_STORED:
14417 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14423 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14424 if (d->code == fcode)
14426 /* Compares are treated specially. */
14427 if (d->icode == CODE_FOR_maskcmpv4sf3
14428 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14429 || d->icode == CODE_FOR_maskncmpv4sf3
14430 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14431 || d->icode == CODE_FOR_maskcmpv2df3
14432 || d->icode == CODE_FOR_vmmaskcmpv2df3
14433 || d->icode == CODE_FOR_maskncmpv2df3
14434 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14435 return ix86_expand_sse_compare (d, arglist, target);
14437 return ix86_expand_binop_builtin (d->icode, arglist, target);
14440 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14441 if (d->code == fcode)
14442 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14444 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14445 if (d->code == fcode)
14446 return ix86_expand_sse_comi (d, arglist, target);
14448 /* @@@ Should really do something sensible here. */
14452 /* Store OPERAND to the memory after reload is completed. This means
14453 that we can't easily use assign_stack_local. */
14455 ix86_force_to_memory (mode, operand)
14456 enum machine_mode mode;
14460 if (!reload_completed)
14462 if (TARGET_64BIT && TARGET_RED_ZONE)
14464 result = gen_rtx_MEM (mode,
14465 gen_rtx_PLUS (Pmode,
14467 GEN_INT (-RED_ZONE_SIZE)));
14468 emit_move_insn (result, operand);
14470 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14476 operand = gen_lowpart (DImode, operand);
14480 gen_rtx_SET (VOIDmode,
14481 gen_rtx_MEM (DImode,
14482 gen_rtx_PRE_DEC (DImode,
14483 stack_pointer_rtx)),
14489 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14498 split_di (&operand, 1, operands, operands + 1);
14500 gen_rtx_SET (VOIDmode,
14501 gen_rtx_MEM (SImode,
14502 gen_rtx_PRE_DEC (Pmode,
14503 stack_pointer_rtx)),
14506 gen_rtx_SET (VOIDmode,
14507 gen_rtx_MEM (SImode,
14508 gen_rtx_PRE_DEC (Pmode,
14509 stack_pointer_rtx)),
14514 /* It is better to store HImodes as SImodes. */
14515 if (!TARGET_PARTIAL_REG_STALL)
14516 operand = gen_lowpart (SImode, operand);
14520 gen_rtx_SET (VOIDmode,
14521 gen_rtx_MEM (GET_MODE (operand),
14522 gen_rtx_PRE_DEC (SImode,
14523 stack_pointer_rtx)),
14529 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14534 /* Free operand from the memory. */
14536 ix86_free_from_memory (mode)
14537 enum machine_mode mode;
14539 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14543 if (mode == DImode || TARGET_64BIT)
14545 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14549 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14550 to pop or add instruction if registers are available. */
14551 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14552 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14557 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14558 QImode must go into class Q_REGS.
14559 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14560 movdf to do mem-to-mem moves through integer regs. */
14562 ix86_preferred_reload_class (x, class)
14564 enum reg_class class;
14566 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14568 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14570 /* SSE can't load any constant directly yet. */
14571 if (SSE_CLASS_P (class))
14573 /* Floats can load 0 and 1. */
14574 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14576 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14577 if (MAYBE_SSE_CLASS_P (class))
14578 return (reg_class_subset_p (class, GENERAL_REGS)
14579 ? GENERAL_REGS : FLOAT_REGS);
14583 /* General regs can load everything. */
14584 if (reg_class_subset_p (class, GENERAL_REGS))
14585 return GENERAL_REGS;
14586 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14587 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14590 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14592 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14597 /* If we are copying between general and FP registers, we need a memory
14598 location. The same is true for SSE and MMX registers.
14600 The macro can't work reliably when one of the CLASSES is class containing
14601 registers from multiple units (SSE, MMX, integer). We avoid this by never
14602 combining those units in single alternative in the machine description.
14603 Ensure that this constraint holds to avoid unexpected surprises.
14605 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14606 enforce these sanity checks. */
14608 ix86_secondary_memory_needed (class1, class2, mode, strict)
14609 enum reg_class class1, class2;
14610 enum machine_mode mode;
14613 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14614 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14615 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14616 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14617 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14618 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14625 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14626 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14627 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14628 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14629 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14631 /* Return the cost of moving data from a register in class CLASS1 to
14632 one in class CLASS2.
14634 It is not required that the cost always equal 2 when FROM is the same as TO;
14635 on some machines it is expensive to move between registers if they are not
14636 general registers. */
14638 ix86_register_move_cost (mode, class1, class2)
14639 enum machine_mode mode;
14640 enum reg_class class1, class2;
14642 /* In case we require secondary memory, compute cost of the store followed
14643 by load. In order to avoid bad register allocation choices, we need
14644 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14646 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14650 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14651 MEMORY_MOVE_COST (mode, class1, 1));
14652 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14653 MEMORY_MOVE_COST (mode, class2, 1));
14655 /* In case of copying from general_purpose_register we may emit multiple
14656 stores followed by single load causing memory size mismatch stall.
14657 Count this as arbitrarily high cost of 20. */
14658 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14661 /* In the case of FP/MMX moves, the registers actually overlap, and we
14662 have to switch modes in order to treat them differently. */
14663 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14664 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14670 /* Moves between SSE/MMX and integer unit are expensive. */
14671 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14672 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14673 return ix86_cost->mmxsse_to_integer;
14674 if (MAYBE_FLOAT_CLASS_P (class1))
14675 return ix86_cost->fp_move;
14676 if (MAYBE_SSE_CLASS_P (class1))
14677 return ix86_cost->sse_move;
14678 if (MAYBE_MMX_CLASS_P (class1))
14679 return ix86_cost->mmx_move;
14683 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14685 ix86_hard_regno_mode_ok (regno, mode)
14687 enum machine_mode mode;
14689 /* Flags and only flags can only hold CCmode values. */
14690 if (CC_REGNO_P (regno))
14691 return GET_MODE_CLASS (mode) == MODE_CC;
14692 if (GET_MODE_CLASS (mode) == MODE_CC
14693 || GET_MODE_CLASS (mode) == MODE_RANDOM
14694 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14696 if (FP_REGNO_P (regno))
14697 return VALID_FP_MODE_P (mode);
14698 if (SSE_REGNO_P (regno))
14699 return VALID_SSE_REG_MODE (mode);
14700 if (MMX_REGNO_P (regno))
14701 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14702 /* We handle both integer and floats in the general purpose registers.
14703 In future we should be able to handle vector modes as well. */
14704 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14706 /* Take care for QImode values - they can be in non-QI regs, but then
14707 they do cause partial register stalls. */
14708 if (regno < 4 || mode != QImode || TARGET_64BIT)
14710 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14713 /* Return the cost of moving data of mode M between a
14714 register and memory. A value of 2 is the default; this cost is
14715 relative to those in `REGISTER_MOVE_COST'.
14717 If moving between registers and memory is more expensive than
14718 between two registers, you should define this macro to express the
14721 Model also increased moving costs of QImode registers in non
14725 ix86_memory_move_cost (mode, class, in)
14726 enum machine_mode mode;
14727 enum reg_class class;
14730 if (FLOAT_CLASS_P (class))
14748 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14750 if (SSE_CLASS_P (class))
14753 switch (GET_MODE_SIZE (mode))
14767 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14769 if (MMX_CLASS_P (class))
14772 switch (GET_MODE_SIZE (mode))
14783 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14785 switch (GET_MODE_SIZE (mode))
14789 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14790 : ix86_cost->movzbl_load);
14792 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14793 : ix86_cost->int_store[0] + 4);
14796 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14798 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14799 if (mode == TFmode)
14801 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14802 * ((int) GET_MODE_SIZE (mode)
14803 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14807 /* Compute a (partial) cost for rtx X. Return true if the complete
14808 cost has been computed, and false if subexpressions should be
14809 scanned. In either case, *TOTAL contains the cost result. */
14812 ix86_rtx_costs (x, code, outer_code, total)
14814 int code, outer_code;
14817 enum machine_mode mode = GET_MODE (x);
14825 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14827 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14829 else if (flag_pic && SYMBOLIC_CONST (x))
14836 if (mode == VOIDmode)
14839 switch (standard_80387_constant_p (x))
14844 default: /* Other constants */
14849 /* Start with (MEM (SYMBOL_REF)), since that's where
14850 it'll probably end up. Add a penalty for size. */
14851 *total = (COSTS_N_INSNS (1)
14853 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14859 /* The zero extensions is often completely free on x86_64, so make
14860 it as cheap as possible. */
14861 if (TARGET_64BIT && mode == DImode
14862 && GET_MODE (XEXP (x, 0)) == SImode)
14864 else if (TARGET_ZERO_EXTEND_WITH_AND)
14865 *total = COSTS_N_INSNS (ix86_cost->add);
14867 *total = COSTS_N_INSNS (ix86_cost->movzx);
14871 *total = COSTS_N_INSNS (ix86_cost->movsx);
14875 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14876 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14878 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14881 *total = COSTS_N_INSNS (ix86_cost->add);
14884 if ((value == 2 || value == 3)
14885 && !TARGET_DECOMPOSE_LEA
14886 && ix86_cost->lea <= ix86_cost->shift_const)
14888 *total = COSTS_N_INSNS (ix86_cost->lea);
14898 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14900 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14902 if (INTVAL (XEXP (x, 1)) > 32)
14903 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14905 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14909 if (GET_CODE (XEXP (x, 1)) == AND)
14910 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14912 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14917 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14918 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14920 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14925 if (FLOAT_MODE_P (mode))
14926 *total = COSTS_N_INSNS (ix86_cost->fmul);
14927 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14929 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14932 for (nbits = 0; value != 0; value >>= 1)
14935 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14936 + nbits * ix86_cost->mult_bit);
14940 /* This is arbitrary */
14941 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14942 + 7 * ix86_cost->mult_bit);
14950 if (FLOAT_MODE_P (mode))
14951 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14953 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14957 if (FLOAT_MODE_P (mode))
14958 *total = COSTS_N_INSNS (ix86_cost->fadd);
14959 else if (!TARGET_DECOMPOSE_LEA
14960 && GET_MODE_CLASS (mode) == MODE_INT
14961 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14963 if (GET_CODE (XEXP (x, 0)) == PLUS
14964 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14965 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14966 && CONSTANT_P (XEXP (x, 1)))
14968 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14969 if (val == 2 || val == 4 || val == 8)
14971 *total = COSTS_N_INSNS (ix86_cost->lea);
14972 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14973 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14975 *total += rtx_cost (XEXP (x, 1), outer_code);
14979 else if (GET_CODE (XEXP (x, 0)) == MULT
14980 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14982 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14983 if (val == 2 || val == 4 || val == 8)
14985 *total = COSTS_N_INSNS (ix86_cost->lea);
14986 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14987 *total += rtx_cost (XEXP (x, 1), outer_code);
14991 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14993 *total = COSTS_N_INSNS (ix86_cost->lea);
14994 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14995 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14996 *total += rtx_cost (XEXP (x, 1), outer_code);
15003 if (FLOAT_MODE_P (mode))
15005 *total = COSTS_N_INSNS (ix86_cost->fadd);
15013 if (!TARGET_64BIT && mode == DImode)
15015 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15016 + (rtx_cost (XEXP (x, 0), outer_code)
15017 << (GET_MODE (XEXP (x, 0)) != DImode))
15018 + (rtx_cost (XEXP (x, 1), outer_code)
15019 << (GET_MODE (XEXP (x, 1)) != DImode)));
15025 if (FLOAT_MODE_P (mode))
15027 *total = COSTS_N_INSNS (ix86_cost->fchs);
15033 if (!TARGET_64BIT && mode == DImode)
15034 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15036 *total = COSTS_N_INSNS (ix86_cost->add);
15040 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15045 if (FLOAT_MODE_P (mode))
15046 *total = COSTS_N_INSNS (ix86_cost->fabs);
15050 if (FLOAT_MODE_P (mode))
15051 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15059 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15061 ix86_svr3_asm_out_constructor (symbol, priority)
15063 int priority ATTRIBUTE_UNUSED;
15066 fputs ("\tpushl $", asm_out_file);
15067 assemble_name (asm_out_file, XSTR (symbol, 0));
15068 fputc ('\n', asm_out_file);
15074 static int current_machopic_label_num;
15076 /* Given a symbol name and its associated stub, write out the
15077 definition of the stub. */
15080 machopic_output_stub (file, symb, stub)
15082 const char *symb, *stub;
15084 unsigned int length;
15085 char *binder_name, *symbol_name, lazy_ptr_name[32];
15086 int label = ++current_machopic_label_num;
15088 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15089 symb = (*targetm.strip_name_encoding) (symb);
15091 length = strlen (stub);
15092 binder_name = alloca (length + 32);
15093 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15095 length = strlen (symb);
15096 symbol_name = alloca (length + 32);
15097 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15099 sprintf (lazy_ptr_name, "L%d$lz", label);
15102 machopic_picsymbol_stub_section ();
15104 machopic_symbol_stub_section ();
15106 fprintf (file, "%s:\n", stub);
15107 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15111 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15112 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15113 fprintf (file, "\tjmp %%edx\n");
15116 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15118 fprintf (file, "%s:\n", binder_name);
15122 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15123 fprintf (file, "\tpushl %%eax\n");
15126 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15128 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15130 machopic_lazy_symbol_ptr_section ();
15131 fprintf (file, "%s:\n", lazy_ptr_name);
15132 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15133 fprintf (file, "\t.long %s\n", binder_name);
15135 #endif /* TARGET_MACHO */
15137 /* Order the registers for register allocator. */
15140 x86_order_regs_for_local_alloc ()
15145 /* First allocate the local general purpose registers. */
15146 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15147 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15148 reg_alloc_order [pos++] = i;
15150 /* Global general purpose registers. */
15151 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15152 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15153 reg_alloc_order [pos++] = i;
15155 /* x87 registers come first in case we are doing FP math
15157 if (!TARGET_SSE_MATH)
15158 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15159 reg_alloc_order [pos++] = i;
15161 /* SSE registers. */
15162 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15163 reg_alloc_order [pos++] = i;
15164 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15165 reg_alloc_order [pos++] = i;
15167 /* x87 registers. */
15168 if (TARGET_SSE_MATH)
15169 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15170 reg_alloc_order [pos++] = i;
15172 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15173 reg_alloc_order [pos++] = i;
15175 /* Initialize the rest of array as we do not allocate some registers
15177 while (pos < FIRST_PSEUDO_REGISTER)
15178 reg_alloc_order [pos++] = 0;
15181 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15182 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15185 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15186 struct attribute_spec.handler. */
15188 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15191 tree args ATTRIBUTE_UNUSED;
15192 int flags ATTRIBUTE_UNUSED;
15193 bool *no_add_attrs;
15196 if (DECL_P (*node))
15198 if (TREE_CODE (*node) == TYPE_DECL)
15199 type = &TREE_TYPE (*node);
15204 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15205 || TREE_CODE (*type) == UNION_TYPE)))
15207 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15208 *no_add_attrs = true;
15211 else if ((is_attribute_p ("ms_struct", name)
15212 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15213 || ((is_attribute_p ("gcc_struct", name)
15214 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15216 warning ("`%s' incompatible attribute ignored",
15217 IDENTIFIER_POINTER (name));
15218 *no_add_attrs = true;
15225 ix86_ms_bitfield_layout_p (record_type)
15228 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15229 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15230 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15233 /* Returns an expression indicating where the this parameter is
15234 located on entry to the FUNCTION. */
15237 x86_this_parameter (function)
15240 tree type = TREE_TYPE (function);
15244 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15245 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15248 if (ix86_fntype_regparm (type) > 0)
15252 parm = TYPE_ARG_TYPES (type);
15253 /* Figure out whether or not the function has a variable number of
15255 for (; parm; parm = TREE_CHAIN (parm))
15256 if (TREE_VALUE (parm) == void_type_node)
15258 /* If not, the this parameter is in %eax. */
15260 return gen_rtx_REG (SImode, 0);
15263 if (aggregate_value_p (TREE_TYPE (type)))
15264 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15266 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15269 /* Determine whether x86_output_mi_thunk can succeed. */
15272 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15273 tree thunk ATTRIBUTE_UNUSED;
15274 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15275 HOST_WIDE_INT vcall_offset;
15278 /* 64-bit can handle anything. */
15282 /* For 32-bit, everything's fine if we have one free register. */
15283 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15286 /* Need a free register for vcall_offset. */
15290 /* Need a free register for GOT references. */
15291 if (flag_pic && !(*targetm.binds_local_p) (function))
15294 /* Otherwise ok. */
15298 /* Output the assembler code for a thunk function. THUNK_DECL is the
15299 declaration for the thunk function itself, FUNCTION is the decl for
15300 the target function. DELTA is an immediate constant offset to be
15301 added to THIS. If VCALL_OFFSET is nonzero, the word at
15302 *(*this + vcall_offset) should be added to THIS. */
15305 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15306 FILE *file ATTRIBUTE_UNUSED;
15307 tree thunk ATTRIBUTE_UNUSED;
15308 HOST_WIDE_INT delta;
15309 HOST_WIDE_INT vcall_offset;
15313 rtx this = x86_this_parameter (function);
15316 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15317 pull it in now and let DELTA benefit. */
15320 else if (vcall_offset)
15322 /* Put the this parameter into %eax. */
15324 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15325 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15328 this_reg = NULL_RTX;
15330 /* Adjust the this parameter by a fixed constant. */
15333 xops[0] = GEN_INT (delta);
15334 xops[1] = this_reg ? this_reg : this;
15337 if (!x86_64_general_operand (xops[0], DImode))
15339 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15341 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15345 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15348 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15351 /* Adjust the this parameter by a value stored in the vtable. */
15355 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15357 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15359 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15362 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15364 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15366 /* Adjust the this parameter. */
15367 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15368 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15370 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15371 xops[0] = GEN_INT (vcall_offset);
15373 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15374 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15376 xops[1] = this_reg;
15378 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15380 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15383 /* If necessary, drop THIS back to its stack slot. */
15384 if (this_reg && this_reg != this)
15386 xops[0] = this_reg;
15388 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15391 xops[0] = DECL_RTL (function);
15394 if (!flag_pic || (*targetm.binds_local_p) (function))
15395 output_asm_insn ("jmp\t%P0", xops);
15398 tmp = XEXP (xops[0], 0);
15399 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15400 tmp = gen_rtx_CONST (Pmode, tmp);
15401 tmp = gen_rtx_MEM (QImode, tmp);
15403 output_asm_insn ("jmp\t%A0", xops);
15408 if (!flag_pic || (*targetm.binds_local_p) (function))
15409 output_asm_insn ("jmp\t%P0", xops);
15414 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15415 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15416 tmp = gen_rtx_MEM (QImode, tmp);
15418 output_asm_insn ("jmp\t%0", xops);
15421 #endif /* TARGET_MACHO */
15423 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15424 output_set_got (tmp);
15427 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15428 output_asm_insn ("jmp\t{*}%1", xops);
15434 x86_field_alignment (field, computed)
15438 enum machine_mode mode;
15439 tree type = TREE_TYPE (field);
15441 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15443 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15444 ? get_inner_array_type (type) : type);
15445 if (mode == DFmode || mode == DCmode
15446 || GET_MODE_CLASS (mode) == MODE_INT
15447 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15448 return MIN (32, computed);
15452 /* Output assembler code to FILE to increment profiler label # LABELNO
15453 for profiling a function entry. */
15455 x86_function_profiler (file, labelno)
15457 int labelno ATTRIBUTE_UNUSED;
15462 #ifndef NO_PROFILE_COUNTERS
15463 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15465 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15469 #ifndef NO_PROFILE_COUNTERS
15470 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15472 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15476 #ifndef NO_PROFILE_COUNTERS
15477 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15478 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15480 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15484 #ifndef NO_PROFILE_COUNTERS
15485 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15486 PROFILE_COUNT_REGISTER);
15488 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15492 /* Implement machine specific optimizations.
15493 At the moment we implement single transformation: AMD Athlon works faster
15494 when RET is not destination of conditional jump or directly preceded
15495 by other jump instruction. We avoid the penalty by inserting NOP just
15496 before the RET instructions in such cases. */
15498 x86_machine_dependent_reorg (first)
15499 rtx first ATTRIBUTE_UNUSED;
15503 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15505 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15507 basic_block bb = e->src;
15510 bool insert = false;
15512 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15514 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15515 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15517 if (prev && GET_CODE (prev) == CODE_LABEL)
15520 for (e = bb->pred; e; e = e->pred_next)
15521 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15522 && !(e->flags & EDGE_FALLTHRU))
15527 prev = prev_active_insn (ret);
15528 if (prev && GET_CODE (prev) == JUMP_INSN
15529 && any_condjump_p (prev))
15531 /* Empty functions get branch misspredict even when the jump destination
15532 is not visible to us. */
15533 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15537 emit_insn_before (gen_nop (), ret);
15541 /* Return nonzero when QImode register that must be represented via REX prefix
15544 x86_extended_QIreg_mentioned_p (insn)
15548 extract_insn_cached (insn);
15549 for (i = 0; i < recog_data.n_operands; i++)
15550 if (REG_P (recog_data.operand[i])
15551 && REGNO (recog_data.operand[i]) >= 4)
15556 /* Return nonzero when P points to register encoded via REX prefix.
15557 Called via for_each_rtx. */
15559 extended_reg_mentioned_1 (p, data)
15561 void *data ATTRIBUTE_UNUSED;
15563 unsigned int regno;
15566 regno = REGNO (*p);
15567 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15570 /* Return true when INSN mentions register that must be encoded using REX
15573 x86_extended_reg_mentioned_p (insn)
15576 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15579 /* Generate an unsigned DImode to FP conversion. This is the same code
15580 optabs would emit if we didn't have TFmode patterns. */
15583 x86_emit_floatuns (operands)
15586 rtx neglab, donelab, i0, i1, f0, in, out;
15587 enum machine_mode mode;
15590 in = force_reg (DImode, operands[1]);
15591 mode = GET_MODE (out);
15592 neglab = gen_label_rtx ();
15593 donelab = gen_label_rtx ();
15594 i1 = gen_reg_rtx (Pmode);
15595 f0 = gen_reg_rtx (mode);
15597 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15599 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15600 emit_jump_insn (gen_jump (donelab));
15603 emit_label (neglab);
15605 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15606 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15607 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15608 expand_float (f0, i0, 0);
15609 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15611 emit_label (donelab);
15614 #include "gt-i386.h"