1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tunning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecesary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_fisttp = m_NOCONA;
747 const int x86_3dnow_a = m_ATHLON_K8;
748 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints = 0;
754 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall = m_PPRO;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850 AREG, DREG, CREG, BREG,
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame;
1017 int outgoing_arguments_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* Preferred alignment for stack boundary in bits. */
1053 unsigned int ix86_preferred_stack_boundary;
1055 /* Values 1-5: see jump.c */
1056 int ix86_branch_cost;
1058 /* Variables which are this size or smaller are put in the data/bss
1059 or ldata/lbss sections. */
1061 int ix86_section_threshold = 65536;
1063 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1064 char internal_label_prefix[16];
1065 int internal_label_prefix_len;
1067 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1068 static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
1070 static bool ix86_handle_option (size_t, const char *, int);
1071 static void output_pic_addr_const (FILE *, rtx, int);
1072 static void put_condition_code (enum rtx_code, enum machine_mode,
1074 static const char *get_some_local_dynamic_name (void);
1075 static int get_some_local_dynamic_name_1 (rtx *, void *);
1076 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1077 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1079 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1080 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1082 static rtx get_thread_pointer (int);
1083 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1084 static void get_pc_thunk_name (char [32], unsigned int);
1085 static rtx gen_push (rtx);
1086 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1087 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1088 static struct machine_function * ix86_init_machine_status (void);
1089 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1090 static int ix86_nsaved_regs (void);
1091 static void ix86_emit_save_regs (void);
1092 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1093 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1094 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1095 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1096 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1097 static rtx ix86_expand_aligntest (rtx, int);
1098 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1099 static int ix86_issue_rate (void);
1100 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1101 static int ia32_multipass_dfa_lookahead (void);
1102 static void ix86_init_mmx_sse_builtins (void);
1103 static void ix86_init_sse_abi_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
1162 static const char *ix86_mangle_fundamental_type (tree);
1163 static tree ix86_stack_protect_fail (void);
1164 static rtx ix86_internal_arg_pointer (void);
1165 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1167 /* This function is only used on Solaris. */
1168 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1171 /* Register class used for passing given 64bit part of the argument.
1172 These represent classes as documented by the PS ABI, with the exception
1173 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1174 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1176 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1177 whenever possible (upper half does contain padding).
1179 enum x86_64_reg_class
1182 X86_64_INTEGER_CLASS,
1183 X86_64_INTEGERSI_CLASS,
1190 X86_64_COMPLEX_X87_CLASS,
1193 static const char * const x86_64_reg_class_name[] = {
1194 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1195 "sseup", "x87", "x87up", "cplx87", "no"
1198 #define MAX_CLASSES 4
1200 /* Table of constants used by fldpi, fldln2, etc.... */
1201 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1202 static bool ext_80387_constants_init = 0;
1203 static void init_ext_80387_constants (void);
1204 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1205 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1206 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1207 static section *x86_64_elf_select_section (tree decl, int reloc,
1208 unsigned HOST_WIDE_INT align)
1211 /* Initialize the GCC target structure. */
1212 #undef TARGET_ATTRIBUTE_TABLE
1213 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1214 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1215 # undef TARGET_MERGE_DECL_ATTRIBUTES
1216 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1219 #undef TARGET_COMP_TYPE_ATTRIBUTES
1220 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1222 #undef TARGET_INIT_BUILTINS
1223 #define TARGET_INIT_BUILTINS ix86_init_builtins
1224 #undef TARGET_EXPAND_BUILTIN
1225 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1227 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1229 #undef TARGET_ASM_FUNCTION_EPILOGUE
1230 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1232 #undef TARGET_ENCODE_SECTION_INFO
1233 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1234 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1236 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1239 #undef TARGET_ASM_OPEN_PAREN
1240 #define TARGET_ASM_OPEN_PAREN ""
1241 #undef TARGET_ASM_CLOSE_PAREN
1242 #define TARGET_ASM_CLOSE_PAREN ""
1244 #undef TARGET_ASM_ALIGNED_HI_OP
1245 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1246 #undef TARGET_ASM_ALIGNED_SI_OP
1247 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1249 #undef TARGET_ASM_ALIGNED_DI_OP
1250 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1253 #undef TARGET_ASM_UNALIGNED_HI_OP
1254 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1255 #undef TARGET_ASM_UNALIGNED_SI_OP
1256 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1257 #undef TARGET_ASM_UNALIGNED_DI_OP
1258 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1260 #undef TARGET_SCHED_ADJUST_COST
1261 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1262 #undef TARGET_SCHED_ISSUE_RATE
1263 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1264 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1265 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1266 ia32_multipass_dfa_lookahead
1268 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1269 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1272 #undef TARGET_HAVE_TLS
1273 #define TARGET_HAVE_TLS true
1275 #undef TARGET_CANNOT_FORCE_CONST_MEM
1276 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1277 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1278 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1280 #undef TARGET_DELEGITIMIZE_ADDRESS
1281 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1283 #undef TARGET_MS_BITFIELD_LAYOUT_P
1284 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1287 #undef TARGET_BINDS_LOCAL_P
1288 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1291 #undef TARGET_ASM_OUTPUT_MI_THUNK
1292 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1293 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1294 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1296 #undef TARGET_ASM_FILE_START
1297 #define TARGET_ASM_FILE_START x86_file_start
1299 #undef TARGET_DEFAULT_TARGET_FLAGS
1300 #define TARGET_DEFAULT_TARGET_FLAGS \
1302 | TARGET_64BIT_DEFAULT \
1303 | TARGET_SUBTARGET_DEFAULT \
1304 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1306 #undef TARGET_HANDLE_OPTION
1307 #define TARGET_HANDLE_OPTION ix86_handle_option
1309 #undef TARGET_RTX_COSTS
1310 #define TARGET_RTX_COSTS ix86_rtx_costs
1311 #undef TARGET_ADDRESS_COST
1312 #define TARGET_ADDRESS_COST ix86_address_cost
1314 #undef TARGET_FIXED_CONDITION_CODE_REGS
1315 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1316 #undef TARGET_CC_MODES_COMPATIBLE
1317 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1319 #undef TARGET_MACHINE_DEPENDENT_REORG
1320 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1322 #undef TARGET_BUILD_BUILTIN_VA_LIST
1323 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1325 #undef TARGET_MD_ASM_CLOBBERS
1326 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1328 #undef TARGET_PROMOTE_PROTOTYPES
1329 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1330 #undef TARGET_STRUCT_VALUE_RTX
1331 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1332 #undef TARGET_SETUP_INCOMING_VARARGS
1333 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1334 #undef TARGET_MUST_PASS_IN_STACK
1335 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1336 #undef TARGET_PASS_BY_REFERENCE
1337 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1338 #undef TARGET_INTERNAL_ARG_POINTER
1339 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1340 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1341 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1343 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1344 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1346 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1347 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1349 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1350 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1353 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1354 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1357 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1358 #undef TARGET_INSERT_ATTRIBUTES
1359 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1362 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1363 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1365 #undef TARGET_STACK_PROTECT_FAIL
1366 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1368 #undef TARGET_FUNCTION_VALUE
1369 #define TARGET_FUNCTION_VALUE ix86_function_value
1371 struct gcc_target targetm = TARGET_INITIALIZER;
1374 /* The svr4 ABI for the i386 says that records and unions are returned
1376 #ifndef DEFAULT_PCC_STRUCT_RETURN
1377 #define DEFAULT_PCC_STRUCT_RETURN 1
1380 /* Implement TARGET_HANDLE_OPTION. */
1383 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1390 target_flags &= ~MASK_3DNOW_A;
1391 target_flags_explicit |= MASK_3DNOW_A;
1398 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1399 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1406 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1407 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1414 target_flags &= ~MASK_SSE3;
1415 target_flags_explicit |= MASK_SSE3;
1424 /* Sometimes certain combinations of command options do not make
1425 sense on a particular target machine. You can define a macro
1426 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1427 defined, is executed once just after all the command options have
1430 Don't use this macro to turn on various extra optimizations for
1431 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1434 override_options (void)
1437 int ix86_tune_defaulted = 0;
1439 /* Comes from final.c -- no real reason to change it. */
1440 #define MAX_CODE_ALIGN 16
1444 const struct processor_costs *cost; /* Processor costs */
1445 const int target_enable; /* Target flags to enable. */
1446 const int target_disable; /* Target flags to disable. */
1447 const int align_loop; /* Default alignments. */
1448 const int align_loop_max_skip;
1449 const int align_jump;
1450 const int align_jump_max_skip;
1451 const int align_func;
1453 const processor_target_table[PROCESSOR_max] =
1455 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1456 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1457 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1459 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1460 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1461 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1462 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1463 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1464 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1465 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1468 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1471 const char *const name; /* processor name or nickname. */
1472 const enum processor_type processor;
1473 const enum pta_flags
1479 PTA_PREFETCH_SSE = 16,
1485 const processor_alias_table[] =
1487 {"i386", PROCESSOR_I386, 0},
1488 {"i486", PROCESSOR_I486, 0},
1489 {"i586", PROCESSOR_PENTIUM, 0},
1490 {"pentium", PROCESSOR_PENTIUM, 0},
1491 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1492 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1493 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1494 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1495 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1496 {"i686", PROCESSOR_PENTIUMPRO, 0},
1497 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1498 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1499 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1500 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1501 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1502 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1503 | PTA_MMX | PTA_PREFETCH_SSE},
1504 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1505 | PTA_MMX | PTA_PREFETCH_SSE},
1506 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1507 | PTA_MMX | PTA_PREFETCH_SSE},
1508 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1509 | PTA_MMX | PTA_PREFETCH_SSE},
1510 {"k6", PROCESSOR_K6, PTA_MMX},
1511 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1512 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1513 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1515 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1516 | PTA_3DNOW | PTA_3DNOW_A},
1517 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1518 | PTA_3DNOW_A | PTA_SSE},
1519 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1520 | PTA_3DNOW_A | PTA_SSE},
1521 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1522 | PTA_3DNOW_A | PTA_SSE},
1523 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1524 | PTA_SSE | PTA_SSE2 },
1525 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1527 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1532 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1533 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1534 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1537 int const pta_size = ARRAY_SIZE (processor_alias_table);
1539 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1540 SUBTARGET_OVERRIDE_OPTIONS;
1543 /* Set the default values for switches whose default depends on TARGET_64BIT
1544 in case they weren't overwritten by command line options. */
1547 if (flag_omit_frame_pointer == 2)
1548 flag_omit_frame_pointer = 1;
1549 if (flag_asynchronous_unwind_tables == 2)
1550 flag_asynchronous_unwind_tables = 1;
1551 if (flag_pcc_struct_return == 2)
1552 flag_pcc_struct_return = 0;
1556 if (flag_omit_frame_pointer == 2)
1557 flag_omit_frame_pointer = 0;
1558 if (flag_asynchronous_unwind_tables == 2)
1559 flag_asynchronous_unwind_tables = 0;
1560 if (flag_pcc_struct_return == 2)
1561 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1564 /* Need to check -mtune=generic first. */
1565 if (ix86_tune_string)
1567 if (!strcmp (ix86_tune_string, "generic")
1568 || !strcmp (ix86_tune_string, "i686"))
1571 ix86_tune_string = "generic64";
1573 ix86_tune_string = "generic32";
1575 else if (!strncmp (ix86_tune_string, "generic", 7))
1576 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1580 if (ix86_arch_string)
1581 ix86_tune_string = ix86_arch_string;
1582 if (!ix86_tune_string)
1584 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1585 ix86_tune_defaulted = 1;
1588 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1589 need to use a sensible tune option. */
1590 if (!strcmp (ix86_tune_string, "generic")
1591 || !strcmp (ix86_tune_string, "x86-64")
1592 || !strcmp (ix86_tune_string, "i686"))
1595 ix86_tune_string = "generic64";
1597 ix86_tune_string = "generic32";
1600 if (!strcmp (ix86_tune_string, "x86-64"))
1601 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1602 "-mtune=generic instead as appropriate.");
1604 if (!ix86_arch_string)
1605 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1606 if (!strcmp (ix86_arch_string, "generic"))
1607 error ("generic CPU can be used only for -mtune= switch");
1608 if (!strncmp (ix86_arch_string, "generic", 7))
1609 error ("bad value (%s) for -march= switch", ix86_arch_string);
1611 if (ix86_cmodel_string != 0)
1613 if (!strcmp (ix86_cmodel_string, "small"))
1614 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1615 else if (!strcmp (ix86_cmodel_string, "medium"))
1616 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1618 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1619 else if (!strcmp (ix86_cmodel_string, "32"))
1620 ix86_cmodel = CM_32;
1621 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1622 ix86_cmodel = CM_KERNEL;
1623 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1624 ix86_cmodel = CM_LARGE;
1626 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1630 ix86_cmodel = CM_32;
1632 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1634 if (ix86_asm_string != 0)
1637 && !strcmp (ix86_asm_string, "intel"))
1638 ix86_asm_dialect = ASM_INTEL;
1639 else if (!strcmp (ix86_asm_string, "att"))
1640 ix86_asm_dialect = ASM_ATT;
1642 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1644 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1645 error ("code model %qs not supported in the %s bit mode",
1646 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1647 if (ix86_cmodel == CM_LARGE)
1648 sorry ("code model %<large%> not supported yet");
1649 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1650 sorry ("%i-bit mode not compiled in",
1651 (target_flags & MASK_64BIT) ? 64 : 32);
1653 for (i = 0; i < pta_size; i++)
1654 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1656 ix86_arch = processor_alias_table[i].processor;
1657 /* Default cpu tuning to the architecture. */
1658 ix86_tune = ix86_arch;
1659 if (processor_alias_table[i].flags & PTA_MMX
1660 && !(target_flags_explicit & MASK_MMX))
1661 target_flags |= MASK_MMX;
1662 if (processor_alias_table[i].flags & PTA_3DNOW
1663 && !(target_flags_explicit & MASK_3DNOW))
1664 target_flags |= MASK_3DNOW;
1665 if (processor_alias_table[i].flags & PTA_3DNOW_A
1666 && !(target_flags_explicit & MASK_3DNOW_A))
1667 target_flags |= MASK_3DNOW_A;
1668 if (processor_alias_table[i].flags & PTA_SSE
1669 && !(target_flags_explicit & MASK_SSE))
1670 target_flags |= MASK_SSE;
1671 if (processor_alias_table[i].flags & PTA_SSE2
1672 && !(target_flags_explicit & MASK_SSE2))
1673 target_flags |= MASK_SSE2;
1674 if (processor_alias_table[i].flags & PTA_SSE3
1675 && !(target_flags_explicit & MASK_SSE3))
1676 target_flags |= MASK_SSE3;
1677 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1678 x86_prefetch_sse = true;
1679 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1680 error ("CPU you selected does not support x86-64 "
1686 error ("bad value (%s) for -march= switch", ix86_arch_string);
1688 for (i = 0; i < pta_size; i++)
1689 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1691 ix86_tune = processor_alias_table[i].processor;
1692 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1694 if (ix86_tune_defaulted)
1696 ix86_tune_string = "x86-64";
1697 for (i = 0; i < pta_size; i++)
1698 if (! strcmp (ix86_tune_string,
1699 processor_alias_table[i].name))
1701 ix86_tune = processor_alias_table[i].processor;
1704 error ("CPU you selected does not support x86-64 "
1707 /* Intel CPUs have always interpreted SSE prefetch instructions as
1708 NOPs; so, we can enable SSE prefetch instructions even when
1709 -mtune (rather than -march) points us to a processor that has them.
1710 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1711 higher processors. */
1712 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1713 x86_prefetch_sse = true;
1717 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1720 ix86_cost = &size_cost;
1722 ix86_cost = processor_target_table[ix86_tune].cost;
1723 target_flags |= processor_target_table[ix86_tune].target_enable;
1724 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1726 /* Arrange to set up i386_stack_locals for all functions. */
1727 init_machine_status = ix86_init_machine_status;
1729 /* Validate -mregparm= value. */
1730 if (ix86_regparm_string)
1732 i = atoi (ix86_regparm_string);
1733 if (i < 0 || i > REGPARM_MAX)
1734 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1740 ix86_regparm = REGPARM_MAX;
1742 /* If the user has provided any of the -malign-* options,
1743 warn and use that value only if -falign-* is not set.
1744 Remove this code in GCC 3.2 or later. */
1745 if (ix86_align_loops_string)
1747 warning (0, "-malign-loops is obsolete, use -falign-loops");
1748 if (align_loops == 0)
1750 i = atoi (ix86_align_loops_string);
1751 if (i < 0 || i > MAX_CODE_ALIGN)
1752 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1754 align_loops = 1 << i;
1758 if (ix86_align_jumps_string)
1760 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1761 if (align_jumps == 0)
1763 i = atoi (ix86_align_jumps_string);
1764 if (i < 0 || i > MAX_CODE_ALIGN)
1765 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1767 align_jumps = 1 << i;
1771 if (ix86_align_funcs_string)
1773 warning (0, "-malign-functions is obsolete, use -falign-functions");
1774 if (align_functions == 0)
1776 i = atoi (ix86_align_funcs_string);
1777 if (i < 0 || i > MAX_CODE_ALIGN)
1778 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1780 align_functions = 1 << i;
1784 /* Default align_* from the processor table. */
1785 if (align_loops == 0)
1787 align_loops = processor_target_table[ix86_tune].align_loop;
1788 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1790 if (align_jumps == 0)
1792 align_jumps = processor_target_table[ix86_tune].align_jump;
1793 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1795 if (align_functions == 0)
1797 align_functions = processor_target_table[ix86_tune].align_func;
1800 /* Validate -mpreferred-stack-boundary= value, or provide default.
1801 The default of 128 bits is for Pentium III's SSE __m128, but we
1802 don't want additional code to keep the stack aligned when
1803 optimizing for code size. */
1804 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1806 if (ix86_preferred_stack_boundary_string)
1808 i = atoi (ix86_preferred_stack_boundary_string);
1809 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1810 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1811 TARGET_64BIT ? 4 : 2);
1813 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1816 /* Validate -mbranch-cost= value, or provide default. */
1817 ix86_branch_cost = ix86_cost->branch_cost;
1818 if (ix86_branch_cost_string)
1820 i = atoi (ix86_branch_cost_string);
1822 error ("-mbranch-cost=%d is not between 0 and 5", i);
1824 ix86_branch_cost = i;
1826 if (ix86_section_threshold_string)
1828 i = atoi (ix86_section_threshold_string);
1830 error ("-mlarge-data-threshold=%d is negative", i);
1832 ix86_section_threshold = i;
1835 if (ix86_tls_dialect_string)
1837 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1838 ix86_tls_dialect = TLS_DIALECT_GNU;
1839 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1840 ix86_tls_dialect = TLS_DIALECT_GNU2;
1841 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1842 ix86_tls_dialect = TLS_DIALECT_SUN;
1844 error ("bad value (%s) for -mtls-dialect= switch",
1845 ix86_tls_dialect_string);
1848 /* Keep nonleaf frame pointers. */
1849 if (flag_omit_frame_pointer)
1850 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1851 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1852 flag_omit_frame_pointer = 1;
1854 /* If we're doing fast math, we don't care about comparison order
1855 wrt NaNs. This lets us use a shorter comparison sequence. */
1856 if (flag_unsafe_math_optimizations)
1857 target_flags &= ~MASK_IEEE_FP;
1859 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1860 since the insns won't need emulation. */
1861 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1862 target_flags &= ~MASK_NO_FANCY_MATH_387;
1864 /* Likewise, if the target doesn't have a 387, or we've specified
1865 software floating point, don't use 387 inline intrinsics. */
1867 target_flags |= MASK_NO_FANCY_MATH_387;
1869 /* Turn on SSE2 builtins for -msse3. */
1871 target_flags |= MASK_SSE2;
1873 /* Turn on SSE builtins for -msse2. */
1875 target_flags |= MASK_SSE;
1877 /* Turn on MMX builtins for -msse. */
1880 target_flags |= MASK_MMX & ~target_flags_explicit;
1881 x86_prefetch_sse = true;
1884 /* Turn on MMX builtins for 3Dnow. */
1886 target_flags |= MASK_MMX;
1890 if (TARGET_ALIGN_DOUBLE)
1891 error ("-malign-double makes no sense in the 64bit mode");
1893 error ("-mrtd calling convention not supported in the 64bit mode");
1895 /* Enable by default the SSE and MMX builtins. Do allow the user to
1896 explicitly disable any of these. In particular, disabling SSE and
1897 MMX for kernel code is extremely useful. */
1899 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1900 & ~target_flags_explicit);
1904 /* i386 ABI does not specify red zone. It still makes sense to use it
1905 when programmer takes care to stack from being destroyed. */
1906 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1907 target_flags |= MASK_NO_RED_ZONE;
1910 /* Accept -msseregparm only if at least SSE support is enabled. */
1911 if (TARGET_SSEREGPARM
1913 error ("-msseregparm used without SSE enabled");
1915 /* Accept -msselibm only if at least SSE support is enabled. */
1918 error ("-msselibm used without SSE2 enabled");
1920 /* Ignore -msselibm on 64bit targets. */
1923 error ("-msselibm used on a 64bit target");
1925 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1927 if (ix86_fpmath_string != 0)
1929 if (! strcmp (ix86_fpmath_string, "387"))
1930 ix86_fpmath = FPMATH_387;
1931 else if (! strcmp (ix86_fpmath_string, "sse"))
1935 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1936 ix86_fpmath = FPMATH_387;
1939 ix86_fpmath = FPMATH_SSE;
1941 else if (! strcmp (ix86_fpmath_string, "387,sse")
1942 || ! strcmp (ix86_fpmath_string, "sse,387"))
1946 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1947 ix86_fpmath = FPMATH_387;
1949 else if (!TARGET_80387)
1951 warning (0, "387 instruction set disabled, using SSE arithmetics");
1952 ix86_fpmath = FPMATH_SSE;
1955 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1958 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1961 /* If the i387 is disabled, then do not return values in it. */
1963 target_flags &= ~MASK_FLOAT_RETURNS;
1965 if ((x86_accumulate_outgoing_args & TUNEMASK)
1966 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1968 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1970 /* ??? Unwind info is not correct around the CFG unless either a frame
1971 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1972 unwind info generation to be aware of the CFG and propagating states
1974 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1975 || flag_exceptions || flag_non_call_exceptions)
1976 && flag_omit_frame_pointer
1977 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1979 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1980 warning (0, "unwind tables currently require either a frame pointer "
1981 "or -maccumulate-outgoing-args for correctness");
1982 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1985 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1988 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1989 p = strchr (internal_label_prefix, 'X');
1990 internal_label_prefix_len = p - internal_label_prefix;
1994 /* When scheduling description is not available, disable scheduler pass
1995 so it won't slow down the compilation and make x87 code slower. */
1996 if (!TARGET_SCHEDULE)
1997 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2000 /* switch to the appropriate section for output of DECL.
2001 DECL is either a `VAR_DECL' node or a constant of some sort.
2002 RELOC indicates whether forming the initial value of DECL requires
2003 link-time relocations. */
2006 x86_64_elf_select_section (tree decl, int reloc,
2007 unsigned HOST_WIDE_INT align)
2009 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2010 && ix86_in_large_data_p (decl))
2012 const char *sname = NULL;
2013 unsigned int flags = SECTION_WRITE;
2014 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2019 case SECCAT_DATA_REL:
2020 sname = ".ldata.rel";
2022 case SECCAT_DATA_REL_LOCAL:
2023 sname = ".ldata.rel.local";
2025 case SECCAT_DATA_REL_RO:
2026 sname = ".ldata.rel.ro";
2028 case SECCAT_DATA_REL_RO_LOCAL:
2029 sname = ".ldata.rel.ro.local";
2033 flags |= SECTION_BSS;
2036 case SECCAT_RODATA_MERGE_STR:
2037 case SECCAT_RODATA_MERGE_STR_INIT:
2038 case SECCAT_RODATA_MERGE_CONST:
2042 case SECCAT_SRODATA:
2049 /* We don't split these for medium model. Place them into
2050 default sections and hope for best. */
2055 /* We might get called with string constants, but get_named_section
2056 doesn't like them as they are not DECLs. Also, we need to set
2057 flags in that case. */
2059 return get_section (sname, flags, NULL);
2060 return get_named_section (decl, sname, reloc);
2063 return default_elf_select_section (decl, reloc, align);
2066 /* Build up a unique section name, expressed as a
2067 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2068 RELOC indicates whether the initial value of EXP requires
2069 link-time relocations. */
2072 x86_64_elf_unique_section (tree decl, int reloc)
2074 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2075 && ix86_in_large_data_p (decl))
2077 const char *prefix = NULL;
2078 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2079 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2081 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2084 case SECCAT_DATA_REL:
2085 case SECCAT_DATA_REL_LOCAL:
2086 case SECCAT_DATA_REL_RO:
2087 case SECCAT_DATA_REL_RO_LOCAL:
2088 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2091 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2094 case SECCAT_RODATA_MERGE_STR:
2095 case SECCAT_RODATA_MERGE_STR_INIT:
2096 case SECCAT_RODATA_MERGE_CONST:
2097 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2099 case SECCAT_SRODATA:
2106 /* We don't split these for medium model. Place them into
2107 default sections and hope for best. */
2115 plen = strlen (prefix);
2117 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2118 name = targetm.strip_name_encoding (name);
2119 nlen = strlen (name);
2121 string = alloca (nlen + plen + 1);
2122 memcpy (string, prefix, plen);
2123 memcpy (string + plen, name, nlen + 1);
2125 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2129 default_unique_section (decl, reloc);
2132 #ifdef COMMON_ASM_OP
2133 /* This says how to output assembler code to declare an
2134 uninitialized external linkage data object.
2136 For medium model x86-64 we need to use .largecomm opcode for
2139 x86_elf_aligned_common (FILE *file,
2140 const char *name, unsigned HOST_WIDE_INT size,
2143 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2144 && size > (unsigned int)ix86_section_threshold)
2145 fprintf (file, ".largecomm\t");
2147 fprintf (file, "%s", COMMON_ASM_OP);
2148 assemble_name (file, name);
2149 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2150 size, align / BITS_PER_UNIT);
2153 /* Utility function for targets to use in implementing
2154 ASM_OUTPUT_ALIGNED_BSS. */
2157 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2158 const char *name, unsigned HOST_WIDE_INT size,
2161 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2162 && size > (unsigned int)ix86_section_threshold)
2163 switch_to_section (get_named_section (decl, ".lbss", 0));
2165 switch_to_section (bss_section);
2166 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2167 #ifdef ASM_DECLARE_OBJECT_NAME
2168 last_assemble_variable_decl = decl;
2169 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2171 /* Standard thing is just output label for the object. */
2172 ASM_OUTPUT_LABEL (file, name);
2173 #endif /* ASM_DECLARE_OBJECT_NAME */
2174 ASM_OUTPUT_SKIP (file, size ? size : 1);
2179 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2181 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2182 make the problem with not enough registers even worse. */
2183 #ifdef INSN_SCHEDULING
2185 flag_schedule_insns = 0;
2189 /* The Darwin libraries never set errno, so we might as well
2190 avoid calling them when that's the only reason we would. */
2191 flag_errno_math = 0;
2193 /* The default values of these switches depend on the TARGET_64BIT
2194 that is not known at this moment. Mark these values with 2 and
2195 let user the to override these. In case there is no command line option
2196 specifying them, we will set the defaults in override_options. */
2198 flag_omit_frame_pointer = 2;
2199 flag_pcc_struct_return = 2;
2200 flag_asynchronous_unwind_tables = 2;
2201 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2202 SUBTARGET_OPTIMIZATION_OPTIONS;
2206 /* Table of valid machine attributes. */
2207 const struct attribute_spec ix86_attribute_table[] =
2209 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2210 /* Stdcall attribute says callee is responsible for popping arguments
2211 if they are not variable. */
2212 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2213 /* Fastcall attribute says callee is responsible for popping arguments
2214 if they are not variable. */
2215 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2216 /* Cdecl attribute says the callee is a normal C declaration */
2217 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2218 /* Regparm attribute specifies how many integer arguments are to be
2219 passed in registers. */
2220 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2221 /* Sseregparm attribute says we are using x86_64 calling conventions
2222 for FP arguments. */
2223 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2224 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2225 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2226 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2227 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2229 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2230 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2231 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2232 SUBTARGET_ATTRIBUTE_TABLE,
2234 { NULL, 0, 0, false, false, false, NULL }
2237 /* Decide whether we can make a sibling call to a function. DECL is the
2238 declaration of the function being targeted by the call and EXP is the
2239 CALL_EXPR representing the call. */
2242 ix86_function_ok_for_sibcall (tree decl, tree exp)
2247 /* If we are generating position-independent code, we cannot sibcall
2248 optimize any indirect call, or a direct call to a global function,
2249 as the PLT requires %ebx be live. */
2250 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2257 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2258 if (POINTER_TYPE_P (func))
2259 func = TREE_TYPE (func);
2262 /* Check that the return value locations are the same. Like
2263 if we are returning floats on the 80387 register stack, we cannot
2264 make a sibcall from a function that doesn't return a float to a
2265 function that does or, conversely, from a function that does return
2266 a float to a function that doesn't; the necessary stack adjustment
2267 would not be executed. This is also the place we notice
2268 differences in the return value ABI. Note that it is ok for one
2269 of the functions to have void return type as long as the return
2270 value of the other is passed in a register. */
2271 a = ix86_function_value (TREE_TYPE (exp), func, false);
2272 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2274 if (STACK_REG_P (a) || STACK_REG_P (b))
2276 if (!rtx_equal_p (a, b))
2279 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2281 else if (!rtx_equal_p (a, b))
2284 /* If this call is indirect, we'll need to be able to use a call-clobbered
2285 register for the address of the target function. Make sure that all
2286 such registers are not used for passing parameters. */
2287 if (!decl && !TARGET_64BIT)
2291 /* We're looking at the CALL_EXPR, we need the type of the function. */
2292 type = TREE_OPERAND (exp, 0); /* pointer expression */
2293 type = TREE_TYPE (type); /* pointer type */
2294 type = TREE_TYPE (type); /* function type */
2296 if (ix86_function_regparm (type, NULL) >= 3)
2298 /* ??? Need to count the actual number of registers to be used,
2299 not the possible number of registers. Fix later. */
2304 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2305 /* Dllimport'd functions are also called indirectly. */
2306 if (decl && DECL_DLLIMPORT_P (decl)
2307 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2311 /* If we forced aligned the stack, then sibcalling would unalign the
2312 stack, which may break the called function. */
2313 if (cfun->machine->force_align_arg_pointer)
2316 /* Otherwise okay. That also includes certain types of indirect calls. */
2320 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2321 calling convention attributes;
2322 arguments as in struct attribute_spec.handler. */
2325 ix86_handle_cconv_attribute (tree *node, tree name,
2327 int flags ATTRIBUTE_UNUSED,
2330 if (TREE_CODE (*node) != FUNCTION_TYPE
2331 && TREE_CODE (*node) != METHOD_TYPE
2332 && TREE_CODE (*node) != FIELD_DECL
2333 && TREE_CODE (*node) != TYPE_DECL)
2335 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2336 IDENTIFIER_POINTER (name));
2337 *no_add_attrs = true;
2341 /* Can combine regparm with all attributes but fastcall. */
2342 if (is_attribute_p ("regparm", name))
2346 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2348 error ("fastcall and regparm attributes are not compatible");
2351 cst = TREE_VALUE (args);
2352 if (TREE_CODE (cst) != INTEGER_CST)
2354 warning (OPT_Wattributes,
2355 "%qs attribute requires an integer constant argument",
2356 IDENTIFIER_POINTER (name));
2357 *no_add_attrs = true;
2359 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2361 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2362 IDENTIFIER_POINTER (name), REGPARM_MAX);
2363 *no_add_attrs = true;
2371 warning (OPT_Wattributes, "%qs attribute ignored",
2372 IDENTIFIER_POINTER (name));
2373 *no_add_attrs = true;
2377 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2378 if (is_attribute_p ("fastcall", name))
2380 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2382 error ("fastcall and cdecl attributes are not compatible");
2384 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2386 error ("fastcall and stdcall attributes are not compatible");
2388 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2390 error ("fastcall and regparm attributes are not compatible");
2394 /* Can combine stdcall with fastcall (redundant), regparm and
2396 else if (is_attribute_p ("stdcall", name))
2398 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2400 error ("stdcall and cdecl attributes are not compatible");
2402 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2404 error ("stdcall and fastcall attributes are not compatible");
2408 /* Can combine cdecl with regparm and sseregparm. */
2409 else if (is_attribute_p ("cdecl", name))
2411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2413 error ("stdcall and cdecl attributes are not compatible");
2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2417 error ("fastcall and cdecl attributes are not compatible");
2421 /* Can combine sseregparm with all attributes. */
2426 /* Return 0 if the attributes for two types are incompatible, 1 if they
2427 are compatible, and 2 if they are nearly compatible (which causes a
2428 warning to be generated). */
2431 ix86_comp_type_attributes (tree type1, tree type2)
2433 /* Check for mismatch of non-default calling convention. */
2434 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2436 if (TREE_CODE (type1) != FUNCTION_TYPE)
2439 /* Check for mismatched fastcall/regparm types. */
2440 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2441 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2442 || (ix86_function_regparm (type1, NULL)
2443 != ix86_function_regparm (type2, NULL)))
2446 /* Check for mismatched sseregparm types. */
2447 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2448 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2451 /* Check for mismatched return types (cdecl vs stdcall). */
2452 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2453 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2459 /* Return the regparm value for a function with the indicated TYPE and DECL.
2460 DECL may be NULL when calling function indirectly
2461 or considering a libcall. */
2464 ix86_function_regparm (tree type, tree decl)
2467 int regparm = ix86_regparm;
2468 bool user_convention = false;
2472 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2475 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2476 user_convention = true;
2479 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2482 user_convention = true;
2485 /* Use register calling convention for local functions when possible. */
2486 if (!TARGET_64BIT && !user_convention && decl
2487 && flag_unit_at_a_time && !profile_flag)
2489 struct cgraph_local_info *i = cgraph_local_info (decl);
2492 int local_regparm, globals = 0, regno;
2494 /* Make sure no regparm register is taken by a global register
2496 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2497 if (global_regs[local_regparm])
2499 /* We can't use regparm(3) for nested functions as these use
2500 static chain pointer in third argument. */
2501 if (local_regparm == 3
2502 && decl_function_context (decl)
2503 && !DECL_NO_STATIC_CHAIN (decl))
2505 /* Each global register variable increases register preassure,
2506 so the more global reg vars there are, the smaller regparm
2507 optimization use, unless requested by the user explicitly. */
2508 for (regno = 0; regno < 6; regno++)
2509 if (global_regs[regno])
2512 = globals < local_regparm ? local_regparm - globals : 0;
2514 if (local_regparm > regparm)
2515 regparm = local_regparm;
2522 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2523 in SSE registers for a function with the indicated TYPE and DECL.
2524 DECL may be NULL when calling function indirectly
2525 or considering a libcall. Otherwise return 0. */
2528 ix86_function_sseregparm (tree type, tree decl)
2530 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2531 by the sseregparm attribute. */
2532 if (TARGET_SSEREGPARM
2534 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2539 error ("Calling %qD with attribute sseregparm without "
2540 "SSE/SSE2 enabled", decl);
2542 error ("Calling %qT with attribute sseregparm without "
2543 "SSE/SSE2 enabled", type);
2550 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2551 in SSE registers even for 32-bit mode and not just 3, but up to
2552 8 SSE arguments in registers. */
2553 if (!TARGET_64BIT && decl
2554 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2556 struct cgraph_local_info *i = cgraph_local_info (decl);
2558 return TARGET_SSE2 ? 2 : 1;
2564 /* Return true if EAX is live at the start of the function. Used by
2565 ix86_expand_prologue to determine if we need special help before
2566 calling allocate_stack_worker. */
2569 ix86_eax_live_at_start_p (void)
2571 /* Cheat. Don't bother working forward from ix86_function_regparm
2572 to the function type to whether an actual argument is located in
2573 eax. Instead just look at cfg info, which is still close enough
2574 to correct at this point. This gives false positives for broken
2575 functions that might use uninitialized data that happens to be
2576 allocated in eax, but who cares? */
2577 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2580 /* Value is the number of bytes of arguments automatically
2581 popped when returning from a subroutine call.
2582 FUNDECL is the declaration node of the function (as a tree),
2583 FUNTYPE is the data type of the function (as a tree),
2584 or for a library call it is an identifier node for the subroutine name.
2585 SIZE is the number of bytes of arguments passed on the stack.
2587 On the 80386, the RTD insn may be used to pop them if the number
2588 of args is fixed, but if the number is variable then the caller
2589 must pop them all. RTD can't be used for library calls now
2590 because the library is compiled with the Unix compiler.
2591 Use of RTD is a selectable option, since it is incompatible with
2592 standard Unix calling sequences. If the option is not selected,
2593 the caller must always pop the args.
2595 The attribute stdcall is equivalent to RTD on a per module basis. */
2598 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2600 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2602 /* Cdecl functions override -mrtd, and never pop the stack. */
2603 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2605 /* Stdcall and fastcall functions will pop the stack if not
2607 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2608 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2612 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2613 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2614 == void_type_node)))
2618 /* Lose any fake structure return argument if it is passed on the stack. */
2619 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2621 && !KEEP_AGGREGATE_RETURN_POINTER)
2623 int nregs = ix86_function_regparm (funtype, fundecl);
2626 return GET_MODE_SIZE (Pmode);
2632 /* Argument support functions. */
2634 /* Return true when register may be used to pass function parameters. */
2636 ix86_function_arg_regno_p (int regno)
2640 return (regno < REGPARM_MAX
2641 || (TARGET_MMX && MMX_REGNO_P (regno)
2642 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2643 || (TARGET_SSE && SSE_REGNO_P (regno)
2644 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2646 if (TARGET_SSE && SSE_REGNO_P (regno)
2647 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2649 /* RAX is used as hidden argument to va_arg functions. */
2652 for (i = 0; i < REGPARM_MAX; i++)
2653 if (regno == x86_64_int_parameter_registers[i])
2658 /* Return if we do not know how to pass TYPE solely in registers. */
2661 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2663 if (must_pass_in_stack_var_size_or_pad (mode, type))
2666 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2667 The layout_type routine is crafty and tries to trick us into passing
2668 currently unsupported vector types on the stack by using TImode. */
2669 return (!TARGET_64BIT && mode == TImode
2670 && type && TREE_CODE (type) != VECTOR_TYPE);
2673 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2674 for a call to a function whose data type is FNTYPE.
2675 For a library call, FNTYPE is 0. */
2678 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2679 tree fntype, /* tree ptr for function decl */
2680 rtx libname, /* SYMBOL_REF of library name or 0 */
2683 static CUMULATIVE_ARGS zero_cum;
2684 tree param, next_param;
2686 if (TARGET_DEBUG_ARG)
2688 fprintf (stderr, "\ninit_cumulative_args (");
2690 fprintf (stderr, "fntype code = %s, ret code = %s",
2691 tree_code_name[(int) TREE_CODE (fntype)],
2692 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2694 fprintf (stderr, "no fntype");
2697 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2702 /* Set up the number of registers to use for passing arguments. */
2703 cum->nregs = ix86_regparm;
2705 cum->sse_nregs = SSE_REGPARM_MAX;
2707 cum->mmx_nregs = MMX_REGPARM_MAX;
2708 cum->warn_sse = true;
2709 cum->warn_mmx = true;
2710 cum->maybe_vaarg = false;
2712 /* Use ecx and edx registers if function has fastcall attribute,
2713 else look for regparm information. */
2714 if (fntype && !TARGET_64BIT)
2716 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2722 cum->nregs = ix86_function_regparm (fntype, fndecl);
2725 /* Set up the number of SSE registers used for passing SFmode
2726 and DFmode arguments. Warn for mismatching ABI. */
2727 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2729 /* Determine if this function has variable arguments. This is
2730 indicated by the last argument being 'void_type_mode' if there
2731 are no variable arguments. If there are variable arguments, then
2732 we won't pass anything in registers in 32-bit mode. */
2734 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2736 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2737 param != 0; param = next_param)
2739 next_param = TREE_CHAIN (param);
2740 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2750 cum->float_in_sse = 0;
2752 cum->maybe_vaarg = true;
2756 if ((!fntype && !libname)
2757 || (fntype && !TYPE_ARG_TYPES (fntype)))
2758 cum->maybe_vaarg = true;
2760 if (TARGET_DEBUG_ARG)
2761 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2766 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2767 But in the case of vector types, it is some vector mode.
2769 When we have only some of our vector isa extensions enabled, then there
2770 are some modes for which vector_mode_supported_p is false. For these
2771 modes, the generic vector support in gcc will choose some non-vector mode
2772 in order to implement the type. By computing the natural mode, we'll
2773 select the proper ABI location for the operand and not depend on whatever
2774 the middle-end decides to do with these vector types. */
2776 static enum machine_mode
2777 type_natural_mode (tree type)
2779 enum machine_mode mode = TYPE_MODE (type);
2781 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2783 HOST_WIDE_INT size = int_size_in_bytes (type);
2784 if ((size == 8 || size == 16)
2785 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2786 && TYPE_VECTOR_SUBPARTS (type) > 1)
2788 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2790 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2791 mode = MIN_MODE_VECTOR_FLOAT;
2793 mode = MIN_MODE_VECTOR_INT;
2795 /* Get the mode which has this inner mode and number of units. */
2796 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2797 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2798 && GET_MODE_INNER (mode) == innermode)
2808 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2809 this may not agree with the mode that the type system has chosen for the
2810 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2811 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2814 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2819 if (orig_mode != BLKmode)
2820 tmp = gen_rtx_REG (orig_mode, regno);
2823 tmp = gen_rtx_REG (mode, regno);
2824 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2825 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2831 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2832 of this code is to classify each 8bytes of incoming argument by the register
2833 class and assign registers accordingly. */
2835 /* Return the union class of CLASS1 and CLASS2.
2836 See the x86-64 PS ABI for details. */
2838 static enum x86_64_reg_class
2839 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2841 /* Rule #1: If both classes are equal, this is the resulting class. */
2842 if (class1 == class2)
2845 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2847 if (class1 == X86_64_NO_CLASS)
2849 if (class2 == X86_64_NO_CLASS)
2852 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2853 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2854 return X86_64_MEMORY_CLASS;
2856 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2857 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2858 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2859 return X86_64_INTEGERSI_CLASS;
2860 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2861 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2862 return X86_64_INTEGER_CLASS;
2864 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2866 if (class1 == X86_64_X87_CLASS
2867 || class1 == X86_64_X87UP_CLASS
2868 || class1 == X86_64_COMPLEX_X87_CLASS
2869 || class2 == X86_64_X87_CLASS
2870 || class2 == X86_64_X87UP_CLASS
2871 || class2 == X86_64_COMPLEX_X87_CLASS)
2872 return X86_64_MEMORY_CLASS;
2874 /* Rule #6: Otherwise class SSE is used. */
2875 return X86_64_SSE_CLASS;
2878 /* Classify the argument of type TYPE and mode MODE.
2879 CLASSES will be filled by the register class used to pass each word
2880 of the operand. The number of words is returned. In case the parameter
2881 should be passed in memory, 0 is returned. As a special case for zero
2882 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2884 BIT_OFFSET is used internally for handling records and specifies offset
2885 of the offset in bits modulo 256 to avoid overflow cases.
2887 See the x86-64 PS ABI for details.
2891 classify_argument (enum machine_mode mode, tree type,
2892 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2894 HOST_WIDE_INT bytes =
2895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2896 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2898 /* Variable sized entities are always passed/returned in memory. */
2902 if (mode != VOIDmode
2903 && targetm.calls.must_pass_in_stack (mode, type))
2906 if (type && AGGREGATE_TYPE_P (type))
2910 enum x86_64_reg_class subclasses[MAX_CLASSES];
2912 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2916 for (i = 0; i < words; i++)
2917 classes[i] = X86_64_NO_CLASS;
2919 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2920 signalize memory class, so handle it as special case. */
2923 classes[0] = X86_64_NO_CLASS;
2927 /* Classify each field of record and merge classes. */
2928 switch (TREE_CODE (type))
2931 /* For classes first merge in the field of the subclasses. */
2932 if (TYPE_BINFO (type))
2934 tree binfo, base_binfo;
2937 for (binfo = TYPE_BINFO (type), basenum = 0;
2938 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2941 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2942 tree type = BINFO_TYPE (base_binfo);
2944 num = classify_argument (TYPE_MODE (type),
2946 (offset + bit_offset) % 256);
2949 for (i = 0; i < num; i++)
2951 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2953 merge_classes (subclasses[i], classes[i + pos]);
2957 /* And now merge the fields of structure. */
2958 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2960 if (TREE_CODE (field) == FIELD_DECL)
2964 if (TREE_TYPE (field) == error_mark_node)
2967 /* Bitfields are always classified as integer. Handle them
2968 early, since later code would consider them to be
2969 misaligned integers. */
2970 if (DECL_BIT_FIELD (field))
2972 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2973 i < ((int_bit_position (field) + (bit_offset % 64))
2974 + tree_low_cst (DECL_SIZE (field), 0)
2977 merge_classes (X86_64_INTEGER_CLASS,
2982 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2983 TREE_TYPE (field), subclasses,
2984 (int_bit_position (field)
2985 + bit_offset) % 256);
2988 for (i = 0; i < num; i++)
2991 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2993 merge_classes (subclasses[i], classes[i + pos]);
3001 /* Arrays are handled as small records. */
3004 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3005 TREE_TYPE (type), subclasses, bit_offset);
3009 /* The partial classes are now full classes. */
3010 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3011 subclasses[0] = X86_64_SSE_CLASS;
3012 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3013 subclasses[0] = X86_64_INTEGER_CLASS;
3015 for (i = 0; i < words; i++)
3016 classes[i] = subclasses[i % num];
3021 case QUAL_UNION_TYPE:
3022 /* Unions are similar to RECORD_TYPE but offset is always 0.
3025 /* Unions are not derived. */
3026 gcc_assert (!TYPE_BINFO (type)
3027 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3028 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3030 if (TREE_CODE (field) == FIELD_DECL)
3033 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3034 TREE_TYPE (field), subclasses,
3038 for (i = 0; i < num; i++)
3039 classes[i] = merge_classes (subclasses[i], classes[i]);
3048 /* Final merger cleanup. */
3049 for (i = 0; i < words; i++)
3051 /* If one class is MEMORY, everything should be passed in
3053 if (classes[i] == X86_64_MEMORY_CLASS)
3056 /* The X86_64_SSEUP_CLASS should be always preceded by
3057 X86_64_SSE_CLASS. */
3058 if (classes[i] == X86_64_SSEUP_CLASS
3059 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3060 classes[i] = X86_64_SSE_CLASS;
3062 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3063 if (classes[i] == X86_64_X87UP_CLASS
3064 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3065 classes[i] = X86_64_SSE_CLASS;
3070 /* Compute alignment needed. We align all types to natural boundaries with
3071 exception of XFmode that is aligned to 64bits. */
3072 if (mode != VOIDmode && mode != BLKmode)
3074 int mode_alignment = GET_MODE_BITSIZE (mode);
3077 mode_alignment = 128;
3078 else if (mode == XCmode)
3079 mode_alignment = 256;
3080 if (COMPLEX_MODE_P (mode))
3081 mode_alignment /= 2;
3082 /* Misaligned fields are always returned in memory. */
3083 if (bit_offset % mode_alignment)
3087 /* for V1xx modes, just use the base mode */
3088 if (VECTOR_MODE_P (mode)
3089 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3090 mode = GET_MODE_INNER (mode);
3092 /* Classification of atomic types. */
3097 classes[0] = X86_64_SSE_CLASS;
3100 classes[0] = X86_64_SSE_CLASS;
3101 classes[1] = X86_64_SSEUP_CLASS;
3110 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3111 classes[0] = X86_64_INTEGERSI_CLASS;
3113 classes[0] = X86_64_INTEGER_CLASS;
3117 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3122 if (!(bit_offset % 64))
3123 classes[0] = X86_64_SSESF_CLASS;
3125 classes[0] = X86_64_SSE_CLASS;
3128 classes[0] = X86_64_SSEDF_CLASS;
3131 classes[0] = X86_64_X87_CLASS;
3132 classes[1] = X86_64_X87UP_CLASS;
3135 classes[0] = X86_64_SSE_CLASS;
3136 classes[1] = X86_64_SSEUP_CLASS;
3139 classes[0] = X86_64_SSE_CLASS;
3142 classes[0] = X86_64_SSEDF_CLASS;
3143 classes[1] = X86_64_SSEDF_CLASS;
3146 classes[0] = X86_64_COMPLEX_X87_CLASS;
3149 /* This modes is larger than 16 bytes. */
3157 classes[0] = X86_64_SSE_CLASS;
3158 classes[1] = X86_64_SSEUP_CLASS;
3164 classes[0] = X86_64_SSE_CLASS;
3170 gcc_assert (VECTOR_MODE_P (mode));
3175 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3177 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3178 classes[0] = X86_64_INTEGERSI_CLASS;
3180 classes[0] = X86_64_INTEGER_CLASS;
3181 classes[1] = X86_64_INTEGER_CLASS;
3182 return 1 + (bytes > 8);
3186 /* Examine the argument and return set number of register required in each
3187 class. Return 0 iff parameter should be passed in memory. */
3189 examine_argument (enum machine_mode mode, tree type, int in_return,
3190 int *int_nregs, int *sse_nregs)
3192 enum x86_64_reg_class class[MAX_CLASSES];
3193 int n = classify_argument (mode, type, class, 0);
3199 for (n--; n >= 0; n--)
3202 case X86_64_INTEGER_CLASS:
3203 case X86_64_INTEGERSI_CLASS:
3206 case X86_64_SSE_CLASS:
3207 case X86_64_SSESF_CLASS:
3208 case X86_64_SSEDF_CLASS:
3211 case X86_64_NO_CLASS:
3212 case X86_64_SSEUP_CLASS:
3214 case X86_64_X87_CLASS:
3215 case X86_64_X87UP_CLASS:
3219 case X86_64_COMPLEX_X87_CLASS:
3220 return in_return ? 2 : 0;
3221 case X86_64_MEMORY_CLASS:
3227 /* Construct container for the argument used by GCC interface. See
3228 FUNCTION_ARG for the detailed description. */
3231 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3232 tree type, int in_return, int nintregs, int nsseregs,
3233 const int *intreg, int sse_regno)
3235 enum machine_mode tmpmode;
3237 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3238 enum x86_64_reg_class class[MAX_CLASSES];
3242 int needed_sseregs, needed_intregs;
3243 rtx exp[MAX_CLASSES];
3246 n = classify_argument (mode, type, class, 0);
3247 if (TARGET_DEBUG_ARG)
3250 fprintf (stderr, "Memory class\n");
3253 fprintf (stderr, "Classes:");
3254 for (i = 0; i < n; i++)
3256 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3258 fprintf (stderr, "\n");
3263 if (!examine_argument (mode, type, in_return, &needed_intregs,
3266 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3269 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3270 some less clueful developer tries to use floating-point anyway. */
3271 if (needed_sseregs && !TARGET_SSE)
3273 static bool issued_error;
3276 issued_error = true;
3278 error ("SSE register return with SSE disabled");
3280 error ("SSE register argument with SSE disabled");
3285 /* First construct simple cases. Avoid SCmode, since we want to use
3286 single register to pass this type. */
3287 if (n == 1 && mode != SCmode)
3290 case X86_64_INTEGER_CLASS:
3291 case X86_64_INTEGERSI_CLASS:
3292 return gen_rtx_REG (mode, intreg[0]);
3293 case X86_64_SSE_CLASS:
3294 case X86_64_SSESF_CLASS:
3295 case X86_64_SSEDF_CLASS:
3296 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3297 case X86_64_X87_CLASS:
3298 case X86_64_COMPLEX_X87_CLASS:
3299 return gen_rtx_REG (mode, FIRST_STACK_REG);
3300 case X86_64_NO_CLASS:
3301 /* Zero sized array, struct or class. */
3306 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3308 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3310 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3311 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3312 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3313 && class[1] == X86_64_INTEGER_CLASS
3314 && (mode == CDImode || mode == TImode || mode == TFmode)
3315 && intreg[0] + 1 == intreg[1])
3316 return gen_rtx_REG (mode, intreg[0]);
3318 /* Otherwise figure out the entries of the PARALLEL. */
3319 for (i = 0; i < n; i++)
3323 case X86_64_NO_CLASS:
3325 case X86_64_INTEGER_CLASS:
3326 case X86_64_INTEGERSI_CLASS:
3327 /* Merge TImodes on aligned occasions here too. */
3328 if (i * 8 + 8 > bytes)
3329 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3330 else if (class[i] == X86_64_INTEGERSI_CLASS)
3334 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3335 if (tmpmode == BLKmode)
3337 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3338 gen_rtx_REG (tmpmode, *intreg),
3342 case X86_64_SSESF_CLASS:
3343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3344 gen_rtx_REG (SFmode,
3345 SSE_REGNO (sse_regno)),
3349 case X86_64_SSEDF_CLASS:
3350 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3351 gen_rtx_REG (DFmode,
3352 SSE_REGNO (sse_regno)),
3356 case X86_64_SSE_CLASS:
3357 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3361 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3362 gen_rtx_REG (tmpmode,
3363 SSE_REGNO (sse_regno)),
3365 if (tmpmode == TImode)
3374 /* Empty aligned struct, union or class. */
3378 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3379 for (i = 0; i < nexps; i++)
3380 XVECEXP (ret, 0, i) = exp [i];
3384 /* Update the data in CUM to advance over an argument
3385 of mode MODE and data type TYPE.
3386 (TYPE is null for libcalls where that information may not be available.) */
3389 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3390 tree type, int named)
3393 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3394 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3397 mode = type_natural_mode (type);
3399 if (TARGET_DEBUG_ARG)
3400 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3401 "mode=%s, named=%d)\n\n",
3402 words, cum->words, cum->nregs, cum->sse_nregs,
3403 GET_MODE_NAME (mode), named);
3407 int int_nregs, sse_nregs;
3408 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3409 cum->words += words;
3410 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3412 cum->nregs -= int_nregs;
3413 cum->sse_nregs -= sse_nregs;
3414 cum->regno += int_nregs;
3415 cum->sse_regno += sse_nregs;
3418 cum->words += words;
3436 cum->words += words;
3437 cum->nregs -= words;
3438 cum->regno += words;
3440 if (cum->nregs <= 0)
3448 if (cum->float_in_sse < 2)
3451 if (cum->float_in_sse < 1)
3462 if (!type || !AGGREGATE_TYPE_P (type))
3464 cum->sse_words += words;
3465 cum->sse_nregs -= 1;
3466 cum->sse_regno += 1;
3467 if (cum->sse_nregs <= 0)
3479 if (!type || !AGGREGATE_TYPE_P (type))
3481 cum->mmx_words += words;
3482 cum->mmx_nregs -= 1;
3483 cum->mmx_regno += 1;
3484 if (cum->mmx_nregs <= 0)
3495 /* Define where to put the arguments to a function.
3496 Value is zero to push the argument on the stack,
3497 or a hard register in which to store the argument.
3499 MODE is the argument's machine mode.
3500 TYPE is the data type of the argument (as a tree).
3501 This is null for libcalls where that information may
3503 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3504 the preceding args and about the function being called.
3505 NAMED is nonzero if this argument is a named parameter
3506 (otherwise it is an extra parameter matching an ellipsis). */
3509 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3510 tree type, int named)
3512 enum machine_mode mode = orig_mode;
3515 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3516 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3517 static bool warnedsse, warnedmmx;
3519 /* To simplify the code below, represent vector types with a vector mode
3520 even if MMX/SSE are not active. */
3521 if (type && TREE_CODE (type) == VECTOR_TYPE)
3522 mode = type_natural_mode (type);
3524 /* Handle a hidden AL argument containing number of registers for varargs
3525 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3527 if (mode == VOIDmode)
3530 return GEN_INT (cum->maybe_vaarg
3531 ? (cum->sse_nregs < 0
3539 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3541 &x86_64_int_parameter_registers [cum->regno],
3546 /* For now, pass fp/complex values on the stack. */
3558 if (words <= cum->nregs)
3560 int regno = cum->regno;
3562 /* Fastcall allocates the first two DWORD (SImode) or
3563 smaller arguments to ECX and EDX. */
3566 if (mode == BLKmode || mode == DImode)
3569 /* ECX not EAX is the first allocated register. */
3573 ret = gen_rtx_REG (mode, regno);
3577 if (cum->float_in_sse < 2)
3580 if (cum->float_in_sse < 1)
3590 if (!type || !AGGREGATE_TYPE_P (type))
3592 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3595 warning (0, "SSE vector argument without SSE enabled "
3599 ret = gen_reg_or_parallel (mode, orig_mode,
3600 cum->sse_regno + FIRST_SSE_REG);
3607 if (!type || !AGGREGATE_TYPE_P (type))
3609 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3612 warning (0, "MMX vector argument without MMX enabled "
3616 ret = gen_reg_or_parallel (mode, orig_mode,
3617 cum->mmx_regno + FIRST_MMX_REG);
3622 if (TARGET_DEBUG_ARG)
3625 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3626 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3629 print_simple_rtl (stderr, ret);
3631 fprintf (stderr, ", stack");
3633 fprintf (stderr, " )\n");
3639 /* A C expression that indicates when an argument must be passed by
3640 reference. If nonzero for an argument, a copy of that argument is
3641 made in memory and a pointer to the argument is passed instead of
3642 the argument itself. The pointer is passed in whatever way is
3643 appropriate for passing a pointer to that type. */
3646 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3647 enum machine_mode mode ATTRIBUTE_UNUSED,
3648 tree type, bool named ATTRIBUTE_UNUSED)
3653 if (type && int_size_in_bytes (type) == -1)
3655 if (TARGET_DEBUG_ARG)
3656 fprintf (stderr, "function_arg_pass_by_reference\n");
3663 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3664 ABI. Only called if TARGET_SSE. */
3666 contains_128bit_aligned_vector_p (tree type)
3668 enum machine_mode mode = TYPE_MODE (type);
3669 if (SSE_REG_MODE_P (mode)
3670 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3672 if (TYPE_ALIGN (type) < 128)
3675 if (AGGREGATE_TYPE_P (type))
3677 /* Walk the aggregates recursively. */
3678 switch (TREE_CODE (type))
3682 case QUAL_UNION_TYPE:
3686 if (TYPE_BINFO (type))
3688 tree binfo, base_binfo;
3691 for (binfo = TYPE_BINFO (type), i = 0;
3692 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3693 if (contains_128bit_aligned_vector_p
3694 (BINFO_TYPE (base_binfo)))
3697 /* And now merge the fields of structure. */
3698 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3700 if (TREE_CODE (field) == FIELD_DECL
3701 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3708 /* Just for use if some languages passes arrays by value. */
3709 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3720 /* Gives the alignment boundary, in bits, of an argument with the
3721 specified mode and type. */
3724 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3728 align = TYPE_ALIGN (type);
3730 align = GET_MODE_ALIGNMENT (mode);
3731 if (align < PARM_BOUNDARY)
3732 align = PARM_BOUNDARY;
3735 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3736 make an exception for SSE modes since these require 128bit
3739 The handling here differs from field_alignment. ICC aligns MMX
3740 arguments to 4 byte boundaries, while structure fields are aligned
3741 to 8 byte boundaries. */
3743 align = PARM_BOUNDARY;
3746 if (!SSE_REG_MODE_P (mode))
3747 align = PARM_BOUNDARY;
3751 if (!contains_128bit_aligned_vector_p (type))
3752 align = PARM_BOUNDARY;
3760 /* Return true if N is a possible register number of function value. */
3762 ix86_function_value_regno_p (int regno)
3765 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3766 || (regno == FIRST_SSE_REG && TARGET_SSE))
3770 && (regno == FIRST_MMX_REG && TARGET_MMX))
3776 /* Define how to find the value returned by a function.
3777 VALTYPE is the data type of the value (as a tree).
3778 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3779 otherwise, FUNC is 0. */
3781 ix86_function_value (tree valtype, tree fntype_or_decl,
3782 bool outgoing ATTRIBUTE_UNUSED)
3784 enum machine_mode natmode = type_natural_mode (valtype);
3788 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3789 1, REGPARM_MAX, SSE_REGPARM_MAX,
3790 x86_64_int_return_registers, 0);
3791 /* For zero sized structures, construct_container return NULL, but we
3792 need to keep rest of compiler happy by returning meaningful value. */
3794 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3799 tree fn = NULL_TREE, fntype;
3801 && DECL_P (fntype_or_decl))
3802 fn = fntype_or_decl;
3803 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3804 return gen_rtx_REG (TYPE_MODE (valtype),
3805 ix86_value_regno (natmode, fn, fntype));
3809 /* Return true iff type is returned in memory. */
3811 ix86_return_in_memory (tree type)
3813 int needed_intregs, needed_sseregs, size;
3814 enum machine_mode mode = type_natural_mode (type);
3817 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3819 if (mode == BLKmode)
3822 size = int_size_in_bytes (type);
3824 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3827 if (VECTOR_MODE_P (mode) || mode == TImode)
3829 /* User-created vectors small enough to fit in EAX. */
3833 /* MMX/3dNow values are returned in MM0,
3834 except when it doesn't exits. */
3836 return (TARGET_MMX ? 0 : 1);
3838 /* SSE values are returned in XMM0, except when it doesn't exist. */
3840 return (TARGET_SSE ? 0 : 1);
3854 /* When returning SSE vector types, we have a choice of either
3855 (1) being abi incompatible with a -march switch, or
3856 (2) generating an error.
3857 Given no good solution, I think the safest thing is one warning.
3858 The user won't be able to use -Werror, but....
3860 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3861 called in response to actually generating a caller or callee that
3862 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3863 via aggregate_value_p for general type probing from tree-ssa. */
3866 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3868 static bool warnedsse, warnedmmx;
3872 /* Look at the return type of the function, not the function type. */
3873 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3875 if (!TARGET_SSE && !warnedsse)
3878 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3881 warning (0, "SSE vector return without SSE enabled "
3886 if (!TARGET_MMX && !warnedmmx)
3888 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3891 warning (0, "MMX vector return without MMX enabled "
3900 /* Define how to find the value returned by a library function
3901 assuming the value has mode MODE. */
3903 ix86_libcall_value (enum machine_mode mode)
3917 return gen_rtx_REG (mode, FIRST_SSE_REG);
3920 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3924 return gen_rtx_REG (mode, 0);
3928 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3931 /* Given a mode, return the register to use for a return value. */
3934 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3936 gcc_assert (!TARGET_64BIT);
3938 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3939 we prevent this case when mmx is not available. */
3940 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3941 return FIRST_MMX_REG;
3943 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3944 we prevent this case when sse is not available. */
3945 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3946 return FIRST_SSE_REG;
3948 /* Decimal floating point values can go in %eax, unlike other float modes. */
3949 if (DECIMAL_FLOAT_MODE_P (mode))
3952 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3953 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3956 /* Floating point return values in %st(0), except for local functions when
3957 SSE math is enabled or for functions with sseregparm attribute. */
3958 if ((func || fntype)
3959 && (mode == SFmode || mode == DFmode))
3961 int sse_level = ix86_function_sseregparm (fntype, func);
3962 if ((sse_level >= 1 && mode == SFmode)
3963 || (sse_level == 2 && mode == DFmode))
3964 return FIRST_SSE_REG;
3967 return FIRST_FLOAT_REG;
3970 /* Create the va_list data type. */
3973 ix86_build_builtin_va_list (void)
3975 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3977 /* For i386 we use plain pointer to argument area. */
3979 return build_pointer_type (char_type_node);
3981 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3982 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3984 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3985 unsigned_type_node);
3986 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3987 unsigned_type_node);
3988 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3990 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3993 va_list_gpr_counter_field = f_gpr;
3994 va_list_fpr_counter_field = f_fpr;
3996 DECL_FIELD_CONTEXT (f_gpr) = record;
3997 DECL_FIELD_CONTEXT (f_fpr) = record;
3998 DECL_FIELD_CONTEXT (f_ovf) = record;
3999 DECL_FIELD_CONTEXT (f_sav) = record;
4001 TREE_CHAIN (record) = type_decl;
4002 TYPE_NAME (record) = type_decl;
4003 TYPE_FIELDS (record) = f_gpr;
4004 TREE_CHAIN (f_gpr) = f_fpr;
4005 TREE_CHAIN (f_fpr) = f_ovf;
4006 TREE_CHAIN (f_ovf) = f_sav;
4008 layout_type (record);
4010 /* The correct type is an array type of one element. */
4011 return build_array_type (record, build_index_type (size_zero_node));
4014 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4017 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4018 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4021 CUMULATIVE_ARGS next_cum;
4022 rtx save_area = NULL_RTX, mem;
4035 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4038 /* Indicate to allocate space on the stack for varargs save area. */
4039 ix86_save_varrargs_registers = 1;
4041 cfun->stack_alignment_needed = 128;
4043 fntype = TREE_TYPE (current_function_decl);
4044 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4045 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4046 != void_type_node));
4048 /* For varargs, we do not want to skip the dummy va_dcl argument.
4049 For stdargs, we do want to skip the last named argument. */
4052 function_arg_advance (&next_cum, mode, type, 1);
4055 save_area = frame_pointer_rtx;
4057 set = get_varargs_alias_set ();
4059 for (i = next_cum.regno;
4061 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4064 mem = gen_rtx_MEM (Pmode,
4065 plus_constant (save_area, i * UNITS_PER_WORD));
4066 MEM_NOTRAP_P (mem) = 1;
4067 set_mem_alias_set (mem, set);
4068 emit_move_insn (mem, gen_rtx_REG (Pmode,
4069 x86_64_int_parameter_registers[i]));
4072 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4074 /* Now emit code to save SSE registers. The AX parameter contains number
4075 of SSE parameter registers used to call this function. We use
4076 sse_prologue_save insn template that produces computed jump across
4077 SSE saves. We need some preparation work to get this working. */
4079 label = gen_label_rtx ();
4080 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4082 /* Compute address to jump to :
4083 label - 5*eax + nnamed_sse_arguments*5 */
4084 tmp_reg = gen_reg_rtx (Pmode);
4085 nsse_reg = gen_reg_rtx (Pmode);
4086 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4087 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4088 gen_rtx_MULT (Pmode, nsse_reg,
4090 if (next_cum.sse_regno)
4093 gen_rtx_CONST (DImode,
4094 gen_rtx_PLUS (DImode,
4096 GEN_INT (next_cum.sse_regno * 4))));
4098 emit_move_insn (nsse_reg, label_ref);
4099 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4101 /* Compute address of memory block we save into. We always use pointer
4102 pointing 127 bytes after first byte to store - this is needed to keep
4103 instruction size limited by 4 bytes. */
4104 tmp_reg = gen_reg_rtx (Pmode);
4105 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4106 plus_constant (save_area,
4107 8 * REGPARM_MAX + 127)));
4108 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4109 MEM_NOTRAP_P (mem) = 1;
4110 set_mem_alias_set (mem, set);
4111 set_mem_align (mem, BITS_PER_WORD);
4113 /* And finally do the dirty job! */
4114 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4115 GEN_INT (next_cum.sse_regno), label));
4120 /* Implement va_start. */
4123 ix86_va_start (tree valist, rtx nextarg)
4125 HOST_WIDE_INT words, n_gpr, n_fpr;
4126 tree f_gpr, f_fpr, f_ovf, f_sav;
4127 tree gpr, fpr, ovf, sav, t;
4130 /* Only 64bit target needs something special. */
4133 std_expand_builtin_va_start (valist, nextarg);
4137 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4138 f_fpr = TREE_CHAIN (f_gpr);
4139 f_ovf = TREE_CHAIN (f_fpr);
4140 f_sav = TREE_CHAIN (f_ovf);
4142 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4143 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4144 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4145 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4146 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4148 /* Count number of gp and fp argument registers used. */
4149 words = current_function_args_info.words;
4150 n_gpr = current_function_args_info.regno;
4151 n_fpr = current_function_args_info.sse_regno;
4153 if (TARGET_DEBUG_ARG)
4154 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4155 (int) words, (int) n_gpr, (int) n_fpr);
4157 if (cfun->va_list_gpr_size)
4159 type = TREE_TYPE (gpr);
4160 t = build2 (MODIFY_EXPR, type, gpr,
4161 build_int_cst (type, n_gpr * 8));
4162 TREE_SIDE_EFFECTS (t) = 1;
4163 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4166 if (cfun->va_list_fpr_size)
4168 type = TREE_TYPE (fpr);
4169 t = build2 (MODIFY_EXPR, type, fpr,
4170 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4171 TREE_SIDE_EFFECTS (t) = 1;
4172 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4175 /* Find the overflow area. */
4176 type = TREE_TYPE (ovf);
4177 t = make_tree (type, virtual_incoming_args_rtx);
4179 t = build2 (PLUS_EXPR, type, t,
4180 build_int_cst (type, words * UNITS_PER_WORD));
4181 t = build2 (MODIFY_EXPR, type, ovf, t);
4182 TREE_SIDE_EFFECTS (t) = 1;
4183 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4185 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4187 /* Find the register save area.
4188 Prologue of the function save it right above stack frame. */
4189 type = TREE_TYPE (sav);
4190 t = make_tree (type, frame_pointer_rtx);
4191 t = build2 (MODIFY_EXPR, type, sav, t);
4192 TREE_SIDE_EFFECTS (t) = 1;
4193 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4197 /* Implement va_arg. */
4200 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4202 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4203 tree f_gpr, f_fpr, f_ovf, f_sav;
4204 tree gpr, fpr, ovf, sav, t;
4206 tree lab_false, lab_over = NULL_TREE;
4211 enum machine_mode nat_mode;
4213 /* Only 64bit target needs something special. */
4215 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4217 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4218 f_fpr = TREE_CHAIN (f_gpr);
4219 f_ovf = TREE_CHAIN (f_fpr);
4220 f_sav = TREE_CHAIN (f_ovf);
4222 valist = build_va_arg_indirect_ref (valist);
4223 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4224 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4225 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4226 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4228 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4230 type = build_pointer_type (type);
4231 size = int_size_in_bytes (type);
4232 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4234 nat_mode = type_natural_mode (type);
4235 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4236 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4238 /* Pull the value out of the saved registers. */
4240 addr = create_tmp_var (ptr_type_node, "addr");
4241 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4245 int needed_intregs, needed_sseregs;
4247 tree int_addr, sse_addr;
4249 lab_false = create_artificial_label ();
4250 lab_over = create_artificial_label ();
4252 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4254 need_temp = (!REG_P (container)
4255 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4256 || TYPE_ALIGN (type) > 128));
4258 /* In case we are passing structure, verify that it is consecutive block
4259 on the register save area. If not we need to do moves. */
4260 if (!need_temp && !REG_P (container))
4262 /* Verify that all registers are strictly consecutive */
4263 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4267 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4269 rtx slot = XVECEXP (container, 0, i);
4270 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4271 || INTVAL (XEXP (slot, 1)) != i * 16)
4279 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4281 rtx slot = XVECEXP (container, 0, i);
4282 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4283 || INTVAL (XEXP (slot, 1)) != i * 8)
4295 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4296 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4297 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4298 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4301 /* First ensure that we fit completely in registers. */
4304 t = build_int_cst (TREE_TYPE (gpr),
4305 (REGPARM_MAX - needed_intregs + 1) * 8);
4306 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4307 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4308 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4309 gimplify_and_add (t, pre_p);
4313 t = build_int_cst (TREE_TYPE (fpr),
4314 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4316 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4317 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4318 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4319 gimplify_and_add (t, pre_p);
4322 /* Compute index to start of area used for integer regs. */
4325 /* int_addr = gpr + sav; */
4326 t = fold_convert (ptr_type_node, gpr);
4327 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4328 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4329 gimplify_and_add (t, pre_p);
4333 /* sse_addr = fpr + sav; */
4334 t = fold_convert (ptr_type_node, fpr);
4335 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4336 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4337 gimplify_and_add (t, pre_p);
4342 tree temp = create_tmp_var (type, "va_arg_tmp");
4345 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4346 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4347 gimplify_and_add (t, pre_p);
4349 for (i = 0; i < XVECLEN (container, 0); i++)
4351 rtx slot = XVECEXP (container, 0, i);
4352 rtx reg = XEXP (slot, 0);
4353 enum machine_mode mode = GET_MODE (reg);
4354 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4355 tree addr_type = build_pointer_type (piece_type);
4358 tree dest_addr, dest;
4360 if (SSE_REGNO_P (REGNO (reg)))
4362 src_addr = sse_addr;
4363 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4367 src_addr = int_addr;
4368 src_offset = REGNO (reg) * 8;
4370 src_addr = fold_convert (addr_type, src_addr);
4371 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4372 size_int (src_offset)));
4373 src = build_va_arg_indirect_ref (src_addr);
4375 dest_addr = fold_convert (addr_type, addr);
4376 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4377 size_int (INTVAL (XEXP (slot, 1)))));
4378 dest = build_va_arg_indirect_ref (dest_addr);
4380 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4381 gimplify_and_add (t, pre_p);
4387 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4388 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4389 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4390 gimplify_and_add (t, pre_p);
4394 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4395 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4396 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4397 gimplify_and_add (t, pre_p);
4400 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4401 gimplify_and_add (t, pre_p);
4403 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4404 append_to_statement_list (t, pre_p);
4407 /* ... otherwise out of the overflow area. */
4409 /* Care for on-stack alignment if needed. */
4410 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4411 || integer_zerop (TYPE_SIZE (type)))
4415 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4416 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4417 build_int_cst (TREE_TYPE (ovf), align - 1));
4418 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4419 build_int_cst (TREE_TYPE (t), -align));
4421 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4423 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4424 gimplify_and_add (t2, pre_p);
4426 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4427 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4428 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4429 gimplify_and_add (t, pre_p);
4433 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4434 append_to_statement_list (t, pre_p);
4437 ptrtype = build_pointer_type (type);
4438 addr = fold_convert (ptrtype, addr);
4441 addr = build_va_arg_indirect_ref (addr);
4442 return build_va_arg_indirect_ref (addr);
4445 /* Return nonzero if OPNUM's MEM should be matched
4446 in movabs* patterns. */
4449 ix86_check_movabs (rtx insn, int opnum)
4453 set = PATTERN (insn);
4454 if (GET_CODE (set) == PARALLEL)
4455 set = XVECEXP (set, 0, 0);
4456 gcc_assert (GET_CODE (set) == SET);
4457 mem = XEXP (set, opnum);
4458 while (GET_CODE (mem) == SUBREG)
4459 mem = SUBREG_REG (mem);
4460 gcc_assert (GET_CODE (mem) == MEM);
4461 return (volatile_ok || !MEM_VOLATILE_P (mem));
4464 /* Initialize the table of extra 80387 mathematical constants. */
4467 init_ext_80387_constants (void)
4469 static const char * cst[5] =
4471 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4472 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4473 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4474 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4475 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4479 for (i = 0; i < 5; i++)
4481 real_from_string (&ext_80387_constants_table[i], cst[i]);
4482 /* Ensure each constant is rounded to XFmode precision. */
4483 real_convert (&ext_80387_constants_table[i],
4484 XFmode, &ext_80387_constants_table[i]);
4487 ext_80387_constants_init = 1;
4490 /* Return true if the constant is something that can be loaded with
4491 a special instruction. */
4494 standard_80387_constant_p (rtx x)
4496 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4499 if (x == CONST0_RTX (GET_MODE (x)))
4501 if (x == CONST1_RTX (GET_MODE (x)))
4504 /* For XFmode constants, try to find a special 80387 instruction when
4505 optimizing for size or on those CPUs that benefit from them. */
4506 if (GET_MODE (x) == XFmode
4507 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4512 if (! ext_80387_constants_init)
4513 init_ext_80387_constants ();
4515 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4516 for (i = 0; i < 5; i++)
4517 if (real_identical (&r, &ext_80387_constants_table[i]))
4524 /* Return the opcode of the special instruction to be used to load
4528 standard_80387_constant_opcode (rtx x)
4530 switch (standard_80387_constant_p (x))
4551 /* Return the CONST_DOUBLE representing the 80387 constant that is
4552 loaded by the specified special instruction. The argument IDX
4553 matches the return value from standard_80387_constant_p. */
4556 standard_80387_constant_rtx (int idx)
4560 if (! ext_80387_constants_init)
4561 init_ext_80387_constants ();
4577 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4581 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4584 standard_sse_constant_p (rtx x)
4586 if (x == const0_rtx)
4588 return (x == CONST0_RTX (GET_MODE (x)));
4591 /* Returns 1 if OP contains a symbol reference */
4594 symbolic_reference_mentioned_p (rtx op)
4599 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4602 fmt = GET_RTX_FORMAT (GET_CODE (op));
4603 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4609 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4610 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4614 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4621 /* Return 1 if it is appropriate to emit `ret' instructions in the
4622 body of a function. Do this only if the epilogue is simple, needing a
4623 couple of insns. Prior to reloading, we can't tell how many registers
4624 must be saved, so return 0 then. Return 0 if there is no frame
4625 marker to de-allocate. */
4628 ix86_can_use_return_insn_p (void)
4630 struct ix86_frame frame;
4632 if (! reload_completed || frame_pointer_needed)
4635 /* Don't allow more than 32 pop, since that's all we can do
4636 with one instruction. */
4637 if (current_function_pops_args
4638 && current_function_args_size >= 32768)
4641 ix86_compute_frame_layout (&frame);
4642 return frame.to_allocate == 0 && frame.nregs == 0;
4645 /* Value should be nonzero if functions must have frame pointers.
4646 Zero means the frame pointer need not be set up (and parms may
4647 be accessed via the stack pointer) in functions that seem suitable. */
4650 ix86_frame_pointer_required (void)
4652 /* If we accessed previous frames, then the generated code expects
4653 to be able to access the saved ebp value in our frame. */
4654 if (cfun->machine->accesses_prev_frame)
4657 /* Several x86 os'es need a frame pointer for other reasons,
4658 usually pertaining to setjmp. */
4659 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4662 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4663 the frame pointer by default. Turn it back on now if we've not
4664 got a leaf function. */
4665 if (TARGET_OMIT_LEAF_FRAME_POINTER
4666 && (!current_function_is_leaf
4667 || ix86_current_function_calls_tls_descriptor))
4670 if (current_function_profile)
4676 /* Record that the current function accesses previous call frames. */
4679 ix86_setup_frame_addresses (void)
4681 cfun->machine->accesses_prev_frame = 1;
4684 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4685 # define USE_HIDDEN_LINKONCE 1
4687 # define USE_HIDDEN_LINKONCE 0
4690 static int pic_labels_used;
4692 /* Fills in the label name that should be used for a pc thunk for
4693 the given register. */
4696 get_pc_thunk_name (char name[32], unsigned int regno)
4698 if (USE_HIDDEN_LINKONCE)
4699 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4701 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4705 /* This function generates code for -fpic that loads %ebx with
4706 the return address of the caller and then returns. */
4709 ix86_file_end (void)
4714 for (regno = 0; regno < 8; ++regno)
4718 if (! ((pic_labels_used >> regno) & 1))
4721 get_pc_thunk_name (name, regno);
4726 switch_to_section (darwin_sections[text_coal_section]);
4727 fputs ("\t.weak_definition\t", asm_out_file);
4728 assemble_name (asm_out_file, name);
4729 fputs ("\n\t.private_extern\t", asm_out_file);
4730 assemble_name (asm_out_file, name);
4731 fputs ("\n", asm_out_file);
4732 ASM_OUTPUT_LABEL (asm_out_file, name);
4736 if (USE_HIDDEN_LINKONCE)
4740 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4742 TREE_PUBLIC (decl) = 1;
4743 TREE_STATIC (decl) = 1;
4744 DECL_ONE_ONLY (decl) = 1;
4746 (*targetm.asm_out.unique_section) (decl, 0);
4747 switch_to_section (get_named_section (decl, NULL, 0));
4749 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4750 fputs ("\t.hidden\t", asm_out_file);
4751 assemble_name (asm_out_file, name);
4752 fputc ('\n', asm_out_file);
4753 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4757 switch_to_section (text_section);
4758 ASM_OUTPUT_LABEL (asm_out_file, name);
4761 xops[0] = gen_rtx_REG (SImode, regno);
4762 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4763 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4764 output_asm_insn ("ret", xops);
4767 if (NEED_INDICATE_EXEC_STACK)
4768 file_end_indicate_exec_stack ();
4771 /* Emit code for the SET_GOT patterns. */
4774 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4779 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4781 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4783 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4786 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4788 output_asm_insn ("call\t%a2", xops);
4791 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4792 is what will be referenced by the Mach-O PIC subsystem. */
4794 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4797 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4798 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4801 output_asm_insn ("pop{l}\t%0", xops);
4806 get_pc_thunk_name (name, REGNO (dest));
4807 pic_labels_used |= 1 << REGNO (dest);
4809 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4810 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4811 output_asm_insn ("call\t%X2", xops);
4812 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4813 is what will be referenced by the Mach-O PIC subsystem. */
4816 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4818 targetm.asm_out.internal_label (asm_out_file, "L",
4819 CODE_LABEL_NUMBER (label));
4826 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4827 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4829 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4834 /* Generate an "push" pattern for input ARG. */
4839 return gen_rtx_SET (VOIDmode,
4841 gen_rtx_PRE_DEC (Pmode,
4842 stack_pointer_rtx)),
4846 /* Return >= 0 if there is an unused call-clobbered register available
4847 for the entire function. */
4850 ix86_select_alt_pic_regnum (void)
4852 if (current_function_is_leaf && !current_function_profile
4853 && !ix86_current_function_calls_tls_descriptor)
4856 for (i = 2; i >= 0; --i)
4857 if (!regs_ever_live[i])
4861 return INVALID_REGNUM;
4864 /* Return 1 if we need to save REGNO. */
4866 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4868 if (pic_offset_table_rtx
4869 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4870 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4871 || current_function_profile
4872 || current_function_calls_eh_return
4873 || current_function_uses_const_pool))
4875 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4880 if (current_function_calls_eh_return && maybe_eh_return)
4885 unsigned test = EH_RETURN_DATA_REGNO (i);
4886 if (test == INVALID_REGNUM)
4893 if (cfun->machine->force_align_arg_pointer
4894 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4897 return (regs_ever_live[regno]
4898 && !call_used_regs[regno]
4899 && !fixed_regs[regno]
4900 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4903 /* Return number of registers to be saved on the stack. */
4906 ix86_nsaved_regs (void)
4911 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4912 if (ix86_save_reg (regno, true))
4917 /* Return the offset between two registers, one to be eliminated, and the other
4918 its replacement, at the start of a routine. */
4921 ix86_initial_elimination_offset (int from, int to)
4923 struct ix86_frame frame;
4924 ix86_compute_frame_layout (&frame);
4926 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4927 return frame.hard_frame_pointer_offset;
4928 else if (from == FRAME_POINTER_REGNUM
4929 && to == HARD_FRAME_POINTER_REGNUM)
4930 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4933 gcc_assert (to == STACK_POINTER_REGNUM);
4935 if (from == ARG_POINTER_REGNUM)
4936 return frame.stack_pointer_offset;
4938 gcc_assert (from == FRAME_POINTER_REGNUM);
4939 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4943 /* Fill structure ix86_frame about frame of currently computed function. */
4946 ix86_compute_frame_layout (struct ix86_frame *frame)
4948 HOST_WIDE_INT total_size;
4949 unsigned int stack_alignment_needed;
4950 HOST_WIDE_INT offset;
4951 unsigned int preferred_alignment;
4952 HOST_WIDE_INT size = get_frame_size ();
4954 frame->nregs = ix86_nsaved_regs ();
4957 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4958 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4960 /* During reload iteration the amount of registers saved can change.
4961 Recompute the value as needed. Do not recompute when amount of registers
4962 didn't change as reload does multiple calls to the function and does not
4963 expect the decision to change within single iteration. */
4965 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4967 int count = frame->nregs;
4969 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4970 /* The fast prologue uses move instead of push to save registers. This
4971 is significantly longer, but also executes faster as modern hardware
4972 can execute the moves in parallel, but can't do that for push/pop.
4974 Be careful about choosing what prologue to emit: When function takes
4975 many instructions to execute we may use slow version as well as in
4976 case function is known to be outside hot spot (this is known with
4977 feedback only). Weight the size of function by number of registers
4978 to save as it is cheap to use one or two push instructions but very
4979 slow to use many of them. */
4981 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4982 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4983 || (flag_branch_probabilities
4984 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4985 cfun->machine->use_fast_prologue_epilogue = false;
4987 cfun->machine->use_fast_prologue_epilogue
4988 = !expensive_function_p (count);
4990 if (TARGET_PROLOGUE_USING_MOVE
4991 && cfun->machine->use_fast_prologue_epilogue)
4992 frame->save_regs_using_mov = true;
4994 frame->save_regs_using_mov = false;
4997 /* Skip return address and saved base pointer. */
4998 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5000 frame->hard_frame_pointer_offset = offset;
5002 /* Do some sanity checking of stack_alignment_needed and
5003 preferred_alignment, since i386 port is the only using those features
5004 that may break easily. */
5006 gcc_assert (!size || stack_alignment_needed);
5007 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5008 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5009 gcc_assert (stack_alignment_needed
5010 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5012 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5013 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5015 /* Register save area */
5016 offset += frame->nregs * UNITS_PER_WORD;
5019 if (ix86_save_varrargs_registers)
5021 offset += X86_64_VARARGS_SIZE;
5022 frame->va_arg_size = X86_64_VARARGS_SIZE;
5025 frame->va_arg_size = 0;
5027 /* Align start of frame for local function. */
5028 frame->padding1 = ((offset + stack_alignment_needed - 1)
5029 & -stack_alignment_needed) - offset;
5031 offset += frame->padding1;
5033 /* Frame pointer points here. */
5034 frame->frame_pointer_offset = offset;
5038 /* Add outgoing arguments area. Can be skipped if we eliminated
5039 all the function calls as dead code.
5040 Skipping is however impossible when function calls alloca. Alloca
5041 expander assumes that last current_function_outgoing_args_size
5042 of stack frame are unused. */
5043 if (ACCUMULATE_OUTGOING_ARGS
5044 && (!current_function_is_leaf || current_function_calls_alloca
5045 || ix86_current_function_calls_tls_descriptor))
5047 offset += current_function_outgoing_args_size;
5048 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5051 frame->outgoing_arguments_size = 0;
5053 /* Align stack boundary. Only needed if we're calling another function
5055 if (!current_function_is_leaf || current_function_calls_alloca
5056 || ix86_current_function_calls_tls_descriptor)
5057 frame->padding2 = ((offset + preferred_alignment - 1)
5058 & -preferred_alignment) - offset;
5060 frame->padding2 = 0;
5062 offset += frame->padding2;
5064 /* We've reached end of stack frame. */
5065 frame->stack_pointer_offset = offset;
5067 /* Size prologue needs to allocate. */
5068 frame->to_allocate =
5069 (size + frame->padding1 + frame->padding2
5070 + frame->outgoing_arguments_size + frame->va_arg_size);
5072 if ((!frame->to_allocate && frame->nregs <= 1)
5073 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5074 frame->save_regs_using_mov = false;
5076 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5077 && current_function_is_leaf
5078 && !ix86_current_function_calls_tls_descriptor)
5080 frame->red_zone_size = frame->to_allocate;
5081 if (frame->save_regs_using_mov)
5082 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5083 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5084 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5087 frame->red_zone_size = 0;
5088 frame->to_allocate -= frame->red_zone_size;
5089 frame->stack_pointer_offset -= frame->red_zone_size;
5091 fprintf (stderr, "nregs: %i\n", frame->nregs);
5092 fprintf (stderr, "size: %i\n", size);
5093 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5094 fprintf (stderr, "padding1: %i\n", frame->padding1);
5095 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5096 fprintf (stderr, "padding2: %i\n", frame->padding2);
5097 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5098 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5099 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5100 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5101 frame->hard_frame_pointer_offset);
5102 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5106 /* Emit code to save registers in the prologue. */
5109 ix86_emit_save_regs (void)
5114 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5115 if (ix86_save_reg (regno, true))
5117 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5118 RTX_FRAME_RELATED_P (insn) = 1;
5122 /* Emit code to save registers using MOV insns. First register
5123 is restored from POINTER + OFFSET. */
5125 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5130 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5131 if (ix86_save_reg (regno, true))
5133 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5135 gen_rtx_REG (Pmode, regno));
5136 RTX_FRAME_RELATED_P (insn) = 1;
5137 offset += UNITS_PER_WORD;
5141 /* Expand prologue or epilogue stack adjustment.
5142 The pattern exist to put a dependency on all ebp-based memory accesses.
5143 STYLE should be negative if instructions should be marked as frame related,
5144 zero if %r11 register is live and cannot be freely used and positive
5148 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5153 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5154 else if (x86_64_immediate_operand (offset, DImode))
5155 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5159 /* r11 is used by indirect sibcall return as well, set before the
5160 epilogue and used after the epilogue. ATM indirect sibcall
5161 shouldn't be used together with huge frame sizes in one
5162 function because of the frame_size check in sibcall.c. */
5164 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5165 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5167 RTX_FRAME_RELATED_P (insn) = 1;
5168 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5172 RTX_FRAME_RELATED_P (insn) = 1;
5175 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5178 ix86_internal_arg_pointer (void)
5180 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5181 && DECL_NAME (current_function_decl)
5182 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5183 && DECL_FILE_SCOPE_P (current_function_decl))
5185 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5186 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5189 return virtual_incoming_args_rtx;
5192 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5193 This is called from dwarf2out.c to emit call frame instructions
5194 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5196 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5198 rtx unspec = SET_SRC (pattern);
5199 gcc_assert (GET_CODE (unspec) == UNSPEC);
5203 case UNSPEC_REG_SAVE:
5204 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5205 SET_DEST (pattern));
5207 case UNSPEC_DEF_CFA:
5208 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5209 INTVAL (XVECEXP (unspec, 0, 0)));
5216 /* Expand the prologue into a bunch of separate insns. */
5219 ix86_expand_prologue (void)
5223 struct ix86_frame frame;
5224 HOST_WIDE_INT allocate;
5226 ix86_compute_frame_layout (&frame);
5228 if (cfun->machine->force_align_arg_pointer)
5232 /* Grab the argument pointer. */
5233 x = plus_constant (stack_pointer_rtx, 4);
5234 y = cfun->machine->force_align_arg_pointer;
5235 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5236 RTX_FRAME_RELATED_P (insn) = 1;
5238 /* The unwind info consists of two parts: install the fafp as the cfa,
5239 and record the fafp as the "save register" of the stack pointer.
5240 The later is there in order that the unwinder can see where it
5241 should restore the stack pointer across the and insn. */
5242 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5243 x = gen_rtx_SET (VOIDmode, y, x);
5244 RTX_FRAME_RELATED_P (x) = 1;
5245 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5247 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5248 RTX_FRAME_RELATED_P (y) = 1;
5249 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5250 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5251 REG_NOTES (insn) = x;
5253 /* Align the stack. */
5254 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5257 /* And here we cheat like madmen with the unwind info. We force the
5258 cfa register back to sp+4, which is exactly what it was at the
5259 start of the function. Re-pushing the return address results in
5260 the return at the same spot relative to the cfa, and thus is
5261 correct wrt the unwind info. */
5262 x = cfun->machine->force_align_arg_pointer;
5263 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5264 insn = emit_insn (gen_push (x));
5265 RTX_FRAME_RELATED_P (insn) = 1;
5268 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5269 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5270 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5271 REG_NOTES (insn) = x;
5274 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5275 slower on all targets. Also sdb doesn't like it. */
5277 if (frame_pointer_needed)
5279 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5280 RTX_FRAME_RELATED_P (insn) = 1;
5282 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5283 RTX_FRAME_RELATED_P (insn) = 1;
5286 allocate = frame.to_allocate;
5288 if (!frame.save_regs_using_mov)
5289 ix86_emit_save_regs ();
5291 allocate += frame.nregs * UNITS_PER_WORD;
5293 /* When using red zone we may start register saving before allocating
5294 the stack frame saving one cycle of the prologue. */
5295 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5296 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5297 : stack_pointer_rtx,
5298 -frame.nregs * UNITS_PER_WORD);
5302 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5303 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5304 GEN_INT (-allocate), -1);
5307 /* Only valid for Win32. */
5308 rtx eax = gen_rtx_REG (SImode, 0);
5309 bool eax_live = ix86_eax_live_at_start_p ();
5312 gcc_assert (!TARGET_64BIT);
5316 emit_insn (gen_push (eax));
5320 emit_move_insn (eax, GEN_INT (allocate));
5322 insn = emit_insn (gen_allocate_stack_worker (eax));
5323 RTX_FRAME_RELATED_P (insn) = 1;
5324 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5325 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5326 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5327 t, REG_NOTES (insn));
5331 if (frame_pointer_needed)
5332 t = plus_constant (hard_frame_pointer_rtx,
5335 - frame.nregs * UNITS_PER_WORD);
5337 t = plus_constant (stack_pointer_rtx, allocate);
5338 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5342 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5344 if (!frame_pointer_needed || !frame.to_allocate)
5345 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5347 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5348 -frame.nregs * UNITS_PER_WORD);
5351 pic_reg_used = false;
5352 if (pic_offset_table_rtx
5353 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5354 || current_function_profile))
5356 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5358 if (alt_pic_reg_used != INVALID_REGNUM)
5359 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5361 pic_reg_used = true;
5367 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5369 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5371 /* Even with accurate pre-reload life analysis, we can wind up
5372 deleting all references to the pic register after reload.
5373 Consider if cross-jumping unifies two sides of a branch
5374 controlled by a comparison vs the only read from a global.
5375 In which case, allow the set_got to be deleted, though we're
5376 too late to do anything about the ebx save in the prologue. */
5377 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5380 /* Prevent function calls from be scheduled before the call to mcount.
5381 In the pic_reg_used case, make sure that the got load isn't deleted. */
5382 if (current_function_profile)
5383 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5386 /* Emit code to restore saved registers using MOV insns. First register
5387 is restored from POINTER + OFFSET. */
5389 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5390 int maybe_eh_return)
5393 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5395 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5396 if (ix86_save_reg (regno, maybe_eh_return))
5398 /* Ensure that adjust_address won't be forced to produce pointer
5399 out of range allowed by x86-64 instruction set. */
5400 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5404 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5405 emit_move_insn (r11, GEN_INT (offset));
5406 emit_insn (gen_adddi3 (r11, r11, pointer));
5407 base_address = gen_rtx_MEM (Pmode, r11);
5410 emit_move_insn (gen_rtx_REG (Pmode, regno),
5411 adjust_address (base_address, Pmode, offset));
5412 offset += UNITS_PER_WORD;
5416 /* Restore function stack, frame, and registers. */
5419 ix86_expand_epilogue (int style)
5422 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5423 struct ix86_frame frame;
5424 HOST_WIDE_INT offset;
5426 ix86_compute_frame_layout (&frame);
5428 /* Calculate start of saved registers relative to ebp. Special care
5429 must be taken for the normal return case of a function using
5430 eh_return: the eax and edx registers are marked as saved, but not
5431 restored along this path. */
5432 offset = frame.nregs;
5433 if (current_function_calls_eh_return && style != 2)
5435 offset *= -UNITS_PER_WORD;
5437 /* If we're only restoring one register and sp is not valid then
5438 using a move instruction to restore the register since it's
5439 less work than reloading sp and popping the register.
5441 The default code result in stack adjustment using add/lea instruction,
5442 while this code results in LEAVE instruction (or discrete equivalent),
5443 so it is profitable in some other cases as well. Especially when there
5444 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5445 and there is exactly one register to pop. This heuristic may need some
5446 tuning in future. */
5447 if ((!sp_valid && frame.nregs <= 1)
5448 || (TARGET_EPILOGUE_USING_MOVE
5449 && cfun->machine->use_fast_prologue_epilogue
5450 && (frame.nregs > 1 || frame.to_allocate))
5451 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5452 || (frame_pointer_needed && TARGET_USE_LEAVE
5453 && cfun->machine->use_fast_prologue_epilogue
5454 && frame.nregs == 1)
5455 || current_function_calls_eh_return)
5457 /* Restore registers. We can use ebp or esp to address the memory
5458 locations. If both are available, default to ebp, since offsets
5459 are known to be small. Only exception is esp pointing directly to the
5460 end of block of saved registers, where we may simplify addressing
5463 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5464 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5465 frame.to_allocate, style == 2);
5467 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5468 offset, style == 2);
5470 /* eh_return epilogues need %ecx added to the stack pointer. */
5473 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5475 if (frame_pointer_needed)
5477 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5478 tmp = plus_constant (tmp, UNITS_PER_WORD);
5479 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5481 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5482 emit_move_insn (hard_frame_pointer_rtx, tmp);
5484 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5489 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5490 tmp = plus_constant (tmp, (frame.to_allocate
5491 + frame.nregs * UNITS_PER_WORD));
5492 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5495 else if (!frame_pointer_needed)
5496 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5497 GEN_INT (frame.to_allocate
5498 + frame.nregs * UNITS_PER_WORD),
5500 /* If not an i386, mov & pop is faster than "leave". */
5501 else if (TARGET_USE_LEAVE || optimize_size
5502 || !cfun->machine->use_fast_prologue_epilogue)
5503 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5506 pro_epilogue_adjust_stack (stack_pointer_rtx,
5507 hard_frame_pointer_rtx,
5510 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5512 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5517 /* First step is to deallocate the stack frame so that we can
5518 pop the registers. */
5521 gcc_assert (frame_pointer_needed);
5522 pro_epilogue_adjust_stack (stack_pointer_rtx,
5523 hard_frame_pointer_rtx,
5524 GEN_INT (offset), style);
5526 else if (frame.to_allocate)
5527 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5528 GEN_INT (frame.to_allocate), style);
5530 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5531 if (ix86_save_reg (regno, false))
5534 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5536 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5538 if (frame_pointer_needed)
5540 /* Leave results in shorter dependency chains on CPUs that are
5541 able to grok it fast. */
5542 if (TARGET_USE_LEAVE)
5543 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5544 else if (TARGET_64BIT)
5545 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5547 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5551 if (cfun->machine->force_align_arg_pointer)
5553 emit_insn (gen_addsi3 (stack_pointer_rtx,
5554 cfun->machine->force_align_arg_pointer,
5558 /* Sibcall epilogues don't want a return instruction. */
5562 if (current_function_pops_args && current_function_args_size)
5564 rtx popc = GEN_INT (current_function_pops_args);
5566 /* i386 can only pop 64K bytes. If asked to pop more, pop
5567 return address, do explicit add, and jump indirectly to the
5570 if (current_function_pops_args >= 65536)
5572 rtx ecx = gen_rtx_REG (SImode, 2);
5574 /* There is no "pascal" calling convention in 64bit ABI. */
5575 gcc_assert (!TARGET_64BIT);
5577 emit_insn (gen_popsi1 (ecx));
5578 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5579 emit_jump_insn (gen_return_indirect_internal (ecx));
5582 emit_jump_insn (gen_return_pop_internal (popc));
5585 emit_jump_insn (gen_return_internal ());
5588 /* Reset from the function's potential modifications. */
5591 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5592 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5594 if (pic_offset_table_rtx)
5595 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5598 /* Extract the parts of an RTL expression that is a valid memory address
5599 for an instruction. Return 0 if the structure of the address is
5600 grossly off. Return -1 if the address contains ASHIFT, so it is not
5601 strictly valid, but still used for computing length of lea instruction. */
5604 ix86_decompose_address (rtx addr, struct ix86_address *out)
5606 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5607 rtx base_reg, index_reg;
5608 HOST_WIDE_INT scale = 1;
5609 rtx scale_rtx = NULL_RTX;
5611 enum ix86_address_seg seg = SEG_DEFAULT;
5613 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5615 else if (GET_CODE (addr) == PLUS)
5625 addends[n++] = XEXP (op, 1);
5628 while (GET_CODE (op) == PLUS);
5633 for (i = n; i >= 0; --i)
5636 switch (GET_CODE (op))
5641 index = XEXP (op, 0);
5642 scale_rtx = XEXP (op, 1);
5646 if (XINT (op, 1) == UNSPEC_TP
5647 && TARGET_TLS_DIRECT_SEG_REFS
5648 && seg == SEG_DEFAULT)
5649 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5678 else if (GET_CODE (addr) == MULT)
5680 index = XEXP (addr, 0); /* index*scale */
5681 scale_rtx = XEXP (addr, 1);
5683 else if (GET_CODE (addr) == ASHIFT)
5687 /* We're called for lea too, which implements ashift on occasion. */
5688 index = XEXP (addr, 0);
5689 tmp = XEXP (addr, 1);
5690 if (GET_CODE (tmp) != CONST_INT)
5692 scale = INTVAL (tmp);
5693 if ((unsigned HOST_WIDE_INT) scale > 3)
5699 disp = addr; /* displacement */
5701 /* Extract the integral value of scale. */
5704 if (GET_CODE (scale_rtx) != CONST_INT)
5706 scale = INTVAL (scale_rtx);
5709 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5710 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5712 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5713 if (base_reg && index_reg && scale == 1
5714 && (index_reg == arg_pointer_rtx
5715 || index_reg == frame_pointer_rtx
5716 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5719 tmp = base, base = index, index = tmp;
5720 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5723 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5724 if ((base_reg == hard_frame_pointer_rtx
5725 || base_reg == frame_pointer_rtx
5726 || base_reg == arg_pointer_rtx) && !disp)
5729 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5730 Avoid this by transforming to [%esi+0]. */
5731 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5732 && base_reg && !index_reg && !disp
5734 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5737 /* Special case: encode reg+reg instead of reg*2. */
5738 if (!base && index && scale && scale == 2)
5739 base = index, base_reg = index_reg, scale = 1;
5741 /* Special case: scaling cannot be encoded without base or displacement. */
5742 if (!base && !disp && index && scale != 1)
5754 /* Return cost of the memory address x.
5755 For i386, it is better to use a complex address than let gcc copy
5756 the address into a reg and make a new pseudo. But not if the address
5757 requires to two regs - that would mean more pseudos with longer
5760 ix86_address_cost (rtx x)
5762 struct ix86_address parts;
5764 int ok = ix86_decompose_address (x, &parts);
5768 if (parts.base && GET_CODE (parts.base) == SUBREG)
5769 parts.base = SUBREG_REG (parts.base);
5770 if (parts.index && GET_CODE (parts.index) == SUBREG)
5771 parts.index = SUBREG_REG (parts.index);
5773 /* More complex memory references are better. */
5774 if (parts.disp && parts.disp != const0_rtx)
5776 if (parts.seg != SEG_DEFAULT)
5779 /* Attempt to minimize number of registers in the address. */
5781 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5783 && (!REG_P (parts.index)
5784 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5788 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5790 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5791 && parts.base != parts.index)
5794 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5795 since it's predecode logic can't detect the length of instructions
5796 and it degenerates to vector decoded. Increase cost of such
5797 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5798 to split such addresses or even refuse such addresses at all.
5800 Following addressing modes are affected:
5805 The first and last case may be avoidable by explicitly coding the zero in
5806 memory address, but I don't have AMD-K6 machine handy to check this
5810 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5811 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5812 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5818 /* If X is a machine specific address (i.e. a symbol or label being
5819 referenced as a displacement from the GOT implemented using an
5820 UNSPEC), then return the base term. Otherwise return X. */
5823 ix86_find_base_term (rtx x)
5829 if (GET_CODE (x) != CONST)
5832 if (GET_CODE (term) == PLUS
5833 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5834 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5835 term = XEXP (term, 0);
5836 if (GET_CODE (term) != UNSPEC
5837 || XINT (term, 1) != UNSPEC_GOTPCREL)
5840 term = XVECEXP (term, 0, 0);
5842 if (GET_CODE (term) != SYMBOL_REF
5843 && GET_CODE (term) != LABEL_REF)
5849 term = ix86_delegitimize_address (x);
5851 if (GET_CODE (term) != SYMBOL_REF
5852 && GET_CODE (term) != LABEL_REF)
5858 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5859 this is used for to form addresses to local data when -fPIC is in
5863 darwin_local_data_pic (rtx disp)
5865 if (GET_CODE (disp) == MINUS)
5867 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5868 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5869 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5871 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5872 if (! strcmp (sym_name, "<pic base>"))
5880 /* Determine if a given RTX is a valid constant. We already know this
5881 satisfies CONSTANT_P. */
5884 legitimate_constant_p (rtx x)
5886 switch (GET_CODE (x))
5891 if (GET_CODE (x) == PLUS)
5893 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5898 if (TARGET_MACHO && darwin_local_data_pic (x))
5901 /* Only some unspecs are valid as "constants". */
5902 if (GET_CODE (x) == UNSPEC)
5903 switch (XINT (x, 1))
5906 return TARGET_64BIT;
5909 x = XVECEXP (x, 0, 0);
5910 return (GET_CODE (x) == SYMBOL_REF
5911 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5913 x = XVECEXP (x, 0, 0);
5914 return (GET_CODE (x) == SYMBOL_REF
5915 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5920 /* We must have drilled down to a symbol. */
5921 if (GET_CODE (x) == LABEL_REF)
5923 if (GET_CODE (x) != SYMBOL_REF)
5928 /* TLS symbols are never valid. */
5929 if (SYMBOL_REF_TLS_MODEL (x))
5937 /* Otherwise we handle everything else in the move patterns. */
5941 /* Determine if it's legal to put X into the constant pool. This
5942 is not possible for the address of thread-local symbols, which
5943 is checked above. */
5946 ix86_cannot_force_const_mem (rtx x)
5948 return !legitimate_constant_p (x);
5951 /* Determine if a given RTX is a valid constant address. */
5954 constant_address_p (rtx x)
5956 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5959 /* Nonzero if the constant value X is a legitimate general operand
5960 when generating PIC code. It is given that flag_pic is on and
5961 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5964 legitimate_pic_operand_p (rtx x)
5968 switch (GET_CODE (x))
5971 inner = XEXP (x, 0);
5972 if (GET_CODE (inner) == PLUS
5973 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5974 inner = XEXP (inner, 0);
5976 /* Only some unspecs are valid as "constants". */
5977 if (GET_CODE (inner) == UNSPEC)
5978 switch (XINT (inner, 1))
5981 return TARGET_64BIT;
5983 x = XVECEXP (inner, 0, 0);
5984 return (GET_CODE (x) == SYMBOL_REF
5985 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5993 return legitimate_pic_address_disp_p (x);
6000 /* Determine if a given CONST RTX is a valid memory displacement
6004 legitimate_pic_address_disp_p (rtx disp)
6008 /* In 64bit mode we can allow direct addresses of symbols and labels
6009 when they are not dynamic symbols. */
6012 rtx op0 = disp, op1;
6014 switch (GET_CODE (disp))
6020 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6022 op0 = XEXP (XEXP (disp, 0), 0);
6023 op1 = XEXP (XEXP (disp, 0), 1);
6024 if (GET_CODE (op1) != CONST_INT
6025 || INTVAL (op1) >= 16*1024*1024
6026 || INTVAL (op1) < -16*1024*1024)
6028 if (GET_CODE (op0) == LABEL_REF)
6030 if (GET_CODE (op0) != SYMBOL_REF)
6035 /* TLS references should always be enclosed in UNSPEC. */
6036 if (SYMBOL_REF_TLS_MODEL (op0))
6038 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6046 if (GET_CODE (disp) != CONST)
6048 disp = XEXP (disp, 0);
6052 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6053 of GOT tables. We should not need these anyway. */
6054 if (GET_CODE (disp) != UNSPEC
6055 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6056 && XINT (disp, 1) != UNSPEC_GOTOFF))
6059 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6060 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6066 if (GET_CODE (disp) == PLUS)
6068 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6070 disp = XEXP (disp, 0);
6074 if (TARGET_MACHO && darwin_local_data_pic (disp))
6077 if (GET_CODE (disp) != UNSPEC)
6080 switch (XINT (disp, 1))
6085 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6087 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6088 While ABI specify also 32bit relocation but we don't produce it in
6089 small PIC model at all. */
6090 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6091 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6093 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6095 case UNSPEC_GOTTPOFF:
6096 case UNSPEC_GOTNTPOFF:
6097 case UNSPEC_INDNTPOFF:
6100 disp = XVECEXP (disp, 0, 0);
6101 return (GET_CODE (disp) == SYMBOL_REF
6102 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6104 disp = XVECEXP (disp, 0, 0);
6105 return (GET_CODE (disp) == SYMBOL_REF
6106 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6108 disp = XVECEXP (disp, 0, 0);
6109 return (GET_CODE (disp) == SYMBOL_REF
6110 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6116 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6117 memory address for an instruction. The MODE argument is the machine mode
6118 for the MEM expression that wants to use this address.
6120 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6121 convert common non-canonical forms to canonical form so that they will
6125 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6127 struct ix86_address parts;
6128 rtx base, index, disp;
6129 HOST_WIDE_INT scale;
6130 const char *reason = NULL;
6131 rtx reason_rtx = NULL_RTX;
6133 if (TARGET_DEBUG_ADDR)
6136 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6137 GET_MODE_NAME (mode), strict);
6141 if (ix86_decompose_address (addr, &parts) <= 0)
6143 reason = "decomposition failed";
6148 index = parts.index;
6150 scale = parts.scale;
6152 /* Validate base register.
6154 Don't allow SUBREG's that span more than a word here. It can lead to spill
6155 failures when the base is one word out of a two word structure, which is
6156 represented internally as a DImode int. */
6165 else if (GET_CODE (base) == SUBREG
6166 && REG_P (SUBREG_REG (base))
6167 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6169 reg = SUBREG_REG (base);
6172 reason = "base is not a register";
6176 if (GET_MODE (base) != Pmode)
6178 reason = "base is not in Pmode";
6182 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6183 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6185 reason = "base is not valid";
6190 /* Validate index register.
6192 Don't allow SUBREG's that span more than a word here -- same as above. */
6201 else if (GET_CODE (index) == SUBREG
6202 && REG_P (SUBREG_REG (index))
6203 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6205 reg = SUBREG_REG (index);
6208 reason = "index is not a register";
6212 if (GET_MODE (index) != Pmode)
6214 reason = "index is not in Pmode";
6218 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6219 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6221 reason = "index is not valid";
6226 /* Validate scale factor. */
6229 reason_rtx = GEN_INT (scale);
6232 reason = "scale without index";
6236 if (scale != 2 && scale != 4 && scale != 8)
6238 reason = "scale is not a valid multiplier";
6243 /* Validate displacement. */
6248 if (GET_CODE (disp) == CONST
6249 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6250 switch (XINT (XEXP (disp, 0), 1))
6252 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6253 used. While ABI specify also 32bit relocations, we don't produce
6254 them at all and use IP relative instead. */
6257 gcc_assert (flag_pic);
6259 goto is_legitimate_pic;
6260 reason = "64bit address unspec";
6263 case UNSPEC_GOTPCREL:
6264 gcc_assert (flag_pic);
6265 goto is_legitimate_pic;
6267 case UNSPEC_GOTTPOFF:
6268 case UNSPEC_GOTNTPOFF:
6269 case UNSPEC_INDNTPOFF:
6275 reason = "invalid address unspec";
6279 else if (flag_pic && (SYMBOLIC_CONST (disp)
6281 && !machopic_operand_p (disp)
6286 if (TARGET_64BIT && (index || base))
6288 /* foo@dtpoff(%rX) is ok. */
6289 if (GET_CODE (disp) != CONST
6290 || GET_CODE (XEXP (disp, 0)) != PLUS
6291 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6292 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6293 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6294 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6296 reason = "non-constant pic memory reference";
6300 else if (! legitimate_pic_address_disp_p (disp))
6302 reason = "displacement is an invalid pic construct";
6306 /* This code used to verify that a symbolic pic displacement
6307 includes the pic_offset_table_rtx register.
6309 While this is good idea, unfortunately these constructs may
6310 be created by "adds using lea" optimization for incorrect
6319 This code is nonsensical, but results in addressing
6320 GOT table with pic_offset_table_rtx base. We can't
6321 just refuse it easily, since it gets matched by
6322 "addsi3" pattern, that later gets split to lea in the
6323 case output register differs from input. While this
6324 can be handled by separate addsi pattern for this case
6325 that never results in lea, this seems to be easier and
6326 correct fix for crash to disable this test. */
6328 else if (GET_CODE (disp) != LABEL_REF
6329 && GET_CODE (disp) != CONST_INT
6330 && (GET_CODE (disp) != CONST
6331 || !legitimate_constant_p (disp))
6332 && (GET_CODE (disp) != SYMBOL_REF
6333 || !legitimate_constant_p (disp)))
6335 reason = "displacement is not constant";
6338 else if (TARGET_64BIT
6339 && !x86_64_immediate_operand (disp, VOIDmode))
6341 reason = "displacement is out of range";
6346 /* Everything looks valid. */
6347 if (TARGET_DEBUG_ADDR)
6348 fprintf (stderr, "Success.\n");
6352 if (TARGET_DEBUG_ADDR)
6354 fprintf (stderr, "Error: %s\n", reason);
6355 debug_rtx (reason_rtx);
6360 /* Return a unique alias set for the GOT. */
6362 static HOST_WIDE_INT
6363 ix86_GOT_alias_set (void)
6365 static HOST_WIDE_INT set = -1;
6367 set = new_alias_set ();
6371 /* Return a legitimate reference for ORIG (an address) using the
6372 register REG. If REG is 0, a new pseudo is generated.
6374 There are two types of references that must be handled:
6376 1. Global data references must load the address from the GOT, via
6377 the PIC reg. An insn is emitted to do this load, and the reg is
6380 2. Static data references, constant pool addresses, and code labels
6381 compute the address as an offset from the GOT, whose base is in
6382 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6383 differentiate them from global data objects. The returned
6384 address is the PIC reg + an unspec constant.
6386 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6387 reg also appears in the address. */
6390 legitimize_pic_address (rtx orig, rtx reg)
6398 reg = gen_reg_rtx (Pmode);
6399 /* Use the generic Mach-O PIC machinery. */
6400 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6403 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6405 else if (TARGET_64BIT
6406 && ix86_cmodel != CM_SMALL_PIC
6407 && local_symbolic_operand (addr, Pmode))
6410 /* This symbol may be referenced via a displacement from the PIC
6411 base address (@GOTOFF). */
6413 if (reload_in_progress)
6414 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6415 if (GET_CODE (addr) == CONST)
6416 addr = XEXP (addr, 0);
6417 if (GET_CODE (addr) == PLUS)
6419 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6420 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6423 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6424 new = gen_rtx_CONST (Pmode, new);
6426 tmpreg = gen_reg_rtx (Pmode);
6429 emit_move_insn (tmpreg, new);
6433 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6434 tmpreg, 1, OPTAB_DIRECT);
6437 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6439 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6441 /* This symbol may be referenced via a displacement from the PIC
6442 base address (@GOTOFF). */
6444 if (reload_in_progress)
6445 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6446 if (GET_CODE (addr) == CONST)
6447 addr = XEXP (addr, 0);
6448 if (GET_CODE (addr) == PLUS)
6450 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6451 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6454 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6455 new = gen_rtx_CONST (Pmode, new);
6456 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6460 emit_move_insn (reg, new);
6464 else if (GET_CODE (addr) == SYMBOL_REF)
6468 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6469 new = gen_rtx_CONST (Pmode, new);
6470 new = gen_const_mem (Pmode, new);
6471 set_mem_alias_set (new, ix86_GOT_alias_set ());
6474 reg = gen_reg_rtx (Pmode);
6475 /* Use directly gen_movsi, otherwise the address is loaded
6476 into register for CSE. We don't want to CSE this addresses,
6477 instead we CSE addresses from the GOT table, so skip this. */
6478 emit_insn (gen_movsi (reg, new));
6483 /* This symbol must be referenced via a load from the
6484 Global Offset Table (@GOT). */
6486 if (reload_in_progress)
6487 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6488 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6489 new = gen_rtx_CONST (Pmode, new);
6490 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6491 new = gen_const_mem (Pmode, new);
6492 set_mem_alias_set (new, ix86_GOT_alias_set ());
6495 reg = gen_reg_rtx (Pmode);
6496 emit_move_insn (reg, new);
6502 if (GET_CODE (addr) == CONST_INT
6503 && !x86_64_immediate_operand (addr, VOIDmode))
6507 emit_move_insn (reg, addr);
6511 new = force_reg (Pmode, addr);
6513 else if (GET_CODE (addr) == CONST)
6515 addr = XEXP (addr, 0);
6517 /* We must match stuff we generate before. Assume the only
6518 unspecs that can get here are ours. Not that we could do
6519 anything with them anyway.... */
6520 if (GET_CODE (addr) == UNSPEC
6521 || (GET_CODE (addr) == PLUS
6522 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6524 gcc_assert (GET_CODE (addr) == PLUS);
6526 if (GET_CODE (addr) == PLUS)
6528 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6530 /* Check first to see if this is a constant offset from a @GOTOFF
6531 symbol reference. */
6532 if (local_symbolic_operand (op0, Pmode)
6533 && GET_CODE (op1) == CONST_INT)
6537 if (reload_in_progress)
6538 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6539 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6541 new = gen_rtx_PLUS (Pmode, new, op1);
6542 new = gen_rtx_CONST (Pmode, new);
6543 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6547 emit_move_insn (reg, new);
6553 if (INTVAL (op1) < -16*1024*1024
6554 || INTVAL (op1) >= 16*1024*1024)
6556 if (!x86_64_immediate_operand (op1, Pmode))
6557 op1 = force_reg (Pmode, op1);
6558 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6564 base = legitimize_pic_address (XEXP (addr, 0), reg);
6565 new = legitimize_pic_address (XEXP (addr, 1),
6566 base == reg ? NULL_RTX : reg);
6568 if (GET_CODE (new) == CONST_INT)
6569 new = plus_constant (base, INTVAL (new));
6572 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6574 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6575 new = XEXP (new, 1);
6577 new = gen_rtx_PLUS (Pmode, base, new);
6585 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6588 get_thread_pointer (int to_reg)
6592 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6596 reg = gen_reg_rtx (Pmode);
6597 insn = gen_rtx_SET (VOIDmode, reg, tp);
6598 insn = emit_insn (insn);
6603 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6604 false if we expect this to be used for a memory address and true if
6605 we expect to load the address into a register. */
6608 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6610 rtx dest, base, off, pic, tp;
6615 case TLS_MODEL_GLOBAL_DYNAMIC:
6616 dest = gen_reg_rtx (Pmode);
6617 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6619 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6621 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6624 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6625 insns = get_insns ();
6628 emit_libcall_block (insns, dest, rax, x);
6630 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6631 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6633 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6635 if (TARGET_GNU2_TLS)
6637 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6639 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6643 case TLS_MODEL_LOCAL_DYNAMIC:
6644 base = gen_reg_rtx (Pmode);
6645 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6647 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6649 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6652 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6653 insns = get_insns ();
6656 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6657 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6658 emit_libcall_block (insns, base, rax, note);
6660 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6661 emit_insn (gen_tls_local_dynamic_base_64 (base));
6663 emit_insn (gen_tls_local_dynamic_base_32 (base));
6665 if (TARGET_GNU2_TLS)
6667 rtx x = ix86_tls_module_base ();
6669 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6671 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6674 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6675 off = gen_rtx_CONST (Pmode, off);
6677 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6680 case TLS_MODEL_INITIAL_EXEC:
6684 type = UNSPEC_GOTNTPOFF;
6688 if (reload_in_progress)
6689 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6690 pic = pic_offset_table_rtx;
6691 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6693 else if (!TARGET_ANY_GNU_TLS)
6695 pic = gen_reg_rtx (Pmode);
6696 emit_insn (gen_set_got (pic));
6697 type = UNSPEC_GOTTPOFF;
6702 type = UNSPEC_INDNTPOFF;
6705 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6706 off = gen_rtx_CONST (Pmode, off);
6708 off = gen_rtx_PLUS (Pmode, pic, off);
6709 off = gen_const_mem (Pmode, off);
6710 set_mem_alias_set (off, ix86_GOT_alias_set ());
6712 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6714 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6715 off = force_reg (Pmode, off);
6716 return gen_rtx_PLUS (Pmode, base, off);
6720 base = get_thread_pointer (true);
6721 dest = gen_reg_rtx (Pmode);
6722 emit_insn (gen_subsi3 (dest, base, off));
6726 case TLS_MODEL_LOCAL_EXEC:
6727 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6728 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6729 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6730 off = gen_rtx_CONST (Pmode, off);
6732 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6734 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6735 return gen_rtx_PLUS (Pmode, base, off);
6739 base = get_thread_pointer (true);
6740 dest = gen_reg_rtx (Pmode);
6741 emit_insn (gen_subsi3 (dest, base, off));
6752 /* Try machine-dependent ways of modifying an illegitimate address
6753 to be legitimate. If we find one, return the new, valid address.
6754 This macro is used in only one place: `memory_address' in explow.c.
6756 OLDX is the address as it was before break_out_memory_refs was called.
6757 In some cases it is useful to look at this to decide what needs to be done.
6759 MODE and WIN are passed so that this macro can use
6760 GO_IF_LEGITIMATE_ADDRESS.
6762 It is always safe for this macro to do nothing. It exists to recognize
6763 opportunities to optimize the output.
6765 For the 80386, we handle X+REG by loading X into a register R and
6766 using R+REG. R will go in a general reg and indexing will be used.
6767 However, if REG is a broken-out memory address or multiplication,
6768 nothing needs to be done because REG can certainly go in a general reg.
6770 When -fpic is used, special handling is needed for symbolic references.
6771 See comments by legitimize_pic_address in i386.c for details. */
6774 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6779 if (TARGET_DEBUG_ADDR)
6781 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6782 GET_MODE_NAME (mode));
6786 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6788 return legitimize_tls_address (x, log, false);
6789 if (GET_CODE (x) == CONST
6790 && GET_CODE (XEXP (x, 0)) == PLUS
6791 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6792 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6794 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6795 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6798 if (flag_pic && SYMBOLIC_CONST (x))
6799 return legitimize_pic_address (x, 0);
6801 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6802 if (GET_CODE (x) == ASHIFT
6803 && GET_CODE (XEXP (x, 1)) == CONST_INT
6804 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6807 log = INTVAL (XEXP (x, 1));
6808 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6809 GEN_INT (1 << log));
6812 if (GET_CODE (x) == PLUS)
6814 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6816 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6817 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6818 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6821 log = INTVAL (XEXP (XEXP (x, 0), 1));
6822 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6823 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6824 GEN_INT (1 << log));
6827 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6828 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6829 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6832 log = INTVAL (XEXP (XEXP (x, 1), 1));
6833 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6834 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6835 GEN_INT (1 << log));
6838 /* Put multiply first if it isn't already. */
6839 if (GET_CODE (XEXP (x, 1)) == MULT)
6841 rtx tmp = XEXP (x, 0);
6842 XEXP (x, 0) = XEXP (x, 1);
6847 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6848 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6849 created by virtual register instantiation, register elimination, and
6850 similar optimizations. */
6851 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6854 x = gen_rtx_PLUS (Pmode,
6855 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6856 XEXP (XEXP (x, 1), 0)),
6857 XEXP (XEXP (x, 1), 1));
6861 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6862 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6863 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6864 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6865 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6866 && CONSTANT_P (XEXP (x, 1)))
6869 rtx other = NULL_RTX;
6871 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6873 constant = XEXP (x, 1);
6874 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6876 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6878 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6879 other = XEXP (x, 1);
6887 x = gen_rtx_PLUS (Pmode,
6888 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6889 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6890 plus_constant (other, INTVAL (constant)));
6894 if (changed && legitimate_address_p (mode, x, FALSE))
6897 if (GET_CODE (XEXP (x, 0)) == MULT)
6900 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6903 if (GET_CODE (XEXP (x, 1)) == MULT)
6906 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6910 && GET_CODE (XEXP (x, 1)) == REG
6911 && GET_CODE (XEXP (x, 0)) == REG)
6914 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6917 x = legitimize_pic_address (x, 0);
6920 if (changed && legitimate_address_p (mode, x, FALSE))
6923 if (GET_CODE (XEXP (x, 0)) == REG)
6925 rtx temp = gen_reg_rtx (Pmode);
6926 rtx val = force_operand (XEXP (x, 1), temp);
6928 emit_move_insn (temp, val);
6934 else if (GET_CODE (XEXP (x, 1)) == REG)
6936 rtx temp = gen_reg_rtx (Pmode);
6937 rtx val = force_operand (XEXP (x, 0), temp);
6939 emit_move_insn (temp, val);
6949 /* Print an integer constant expression in assembler syntax. Addition
6950 and subtraction are the only arithmetic that may appear in these
6951 expressions. FILE is the stdio stream to write to, X is the rtx, and
6952 CODE is the operand print code from the output string. */
6955 output_pic_addr_const (FILE *file, rtx x, int code)
6959 switch (GET_CODE (x))
6962 gcc_assert (flag_pic);
6967 output_addr_const (file, x);
6968 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6969 fputs ("@PLT", file);
6976 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6977 assemble_name (asm_out_file, buf);
6981 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6985 /* This used to output parentheses around the expression,
6986 but that does not work on the 386 (either ATT or BSD assembler). */
6987 output_pic_addr_const (file, XEXP (x, 0), code);
6991 if (GET_MODE (x) == VOIDmode)
6993 /* We can use %d if the number is <32 bits and positive. */
6994 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6995 fprintf (file, "0x%lx%08lx",
6996 (unsigned long) CONST_DOUBLE_HIGH (x),
6997 (unsigned long) CONST_DOUBLE_LOW (x));
6999 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7002 /* We can't handle floating point constants;
7003 PRINT_OPERAND must handle them. */
7004 output_operand_lossage ("floating constant misused");
7008 /* Some assemblers need integer constants to appear first. */
7009 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7011 output_pic_addr_const (file, XEXP (x, 0), code);
7013 output_pic_addr_const (file, XEXP (x, 1), code);
7017 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7018 output_pic_addr_const (file, XEXP (x, 1), code);
7020 output_pic_addr_const (file, XEXP (x, 0), code);
7026 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7027 output_pic_addr_const (file, XEXP (x, 0), code);
7029 output_pic_addr_const (file, XEXP (x, 1), code);
7031 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7035 gcc_assert (XVECLEN (x, 0) == 1);
7036 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7037 switch (XINT (x, 1))
7040 fputs ("@GOT", file);
7043 fputs ("@GOTOFF", file);
7045 case UNSPEC_GOTPCREL:
7046 fputs ("@GOTPCREL(%rip)", file);
7048 case UNSPEC_GOTTPOFF:
7049 /* FIXME: This might be @TPOFF in Sun ld too. */
7050 fputs ("@GOTTPOFF", file);
7053 fputs ("@TPOFF", file);
7057 fputs ("@TPOFF", file);
7059 fputs ("@NTPOFF", file);
7062 fputs ("@DTPOFF", file);
7064 case UNSPEC_GOTNTPOFF:
7066 fputs ("@GOTTPOFF(%rip)", file);
7068 fputs ("@GOTNTPOFF", file);
7070 case UNSPEC_INDNTPOFF:
7071 fputs ("@INDNTPOFF", file);
7074 output_operand_lossage ("invalid UNSPEC as operand");
7080 output_operand_lossage ("invalid expression as operand");
7084 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7085 We need to emit DTP-relative relocations. */
7088 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7090 fputs (ASM_LONG, file);
7091 output_addr_const (file, x);
7092 fputs ("@DTPOFF", file);
7098 fputs (", 0", file);
7105 /* In the name of slightly smaller debug output, and to cater to
7106 general assembler lossage, recognize PIC+GOTOFF and turn it back
7107 into a direct symbol reference.
7109 On Darwin, this is necessary to avoid a crash, because Darwin
7110 has a different PIC label for each routine but the DWARF debugging
7111 information is not associated with any particular routine, so it's
7112 necessary to remove references to the PIC label from RTL stored by
7113 the DWARF output code. */
7116 ix86_delegitimize_address (rtx orig_x)
7119 /* reg_addend is NULL or a multiple of some register. */
7120 rtx reg_addend = NULL_RTX;
7121 /* const_addend is NULL or a const_int. */
7122 rtx const_addend = NULL_RTX;
7123 /* This is the result, or NULL. */
7124 rtx result = NULL_RTX;
7126 if (GET_CODE (x) == MEM)
7131 if (GET_CODE (x) != CONST
7132 || GET_CODE (XEXP (x, 0)) != UNSPEC
7133 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7134 || GET_CODE (orig_x) != MEM)
7136 return XVECEXP (XEXP (x, 0), 0, 0);
7139 if (GET_CODE (x) != PLUS
7140 || GET_CODE (XEXP (x, 1)) != CONST)
7143 if (GET_CODE (XEXP (x, 0)) == REG
7144 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7145 /* %ebx + GOT/GOTOFF */
7147 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7149 /* %ebx + %reg * scale + GOT/GOTOFF */
7150 reg_addend = XEXP (x, 0);
7151 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7152 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7153 reg_addend = XEXP (reg_addend, 1);
7154 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7155 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7156 reg_addend = XEXP (reg_addend, 0);
7159 if (GET_CODE (reg_addend) != REG
7160 && GET_CODE (reg_addend) != MULT
7161 && GET_CODE (reg_addend) != ASHIFT)
7167 x = XEXP (XEXP (x, 1), 0);
7168 if (GET_CODE (x) == PLUS
7169 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7171 const_addend = XEXP (x, 1);
7175 if (GET_CODE (x) == UNSPEC
7176 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7177 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7178 result = XVECEXP (x, 0, 0);
7180 if (TARGET_MACHO && darwin_local_data_pic (x)
7181 && GET_CODE (orig_x) != MEM)
7182 result = XEXP (x, 0);
7188 result = gen_rtx_PLUS (Pmode, result, const_addend);
7190 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7195 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7200 if (mode == CCFPmode || mode == CCFPUmode)
7202 enum rtx_code second_code, bypass_code;
7203 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7204 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7205 code = ix86_fp_compare_code_to_integer (code);
7209 code = reverse_condition (code);
7220 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7224 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7225 Those same assemblers have the same but opposite lossage on cmov. */
7226 gcc_assert (mode == CCmode);
7227 suffix = fp ? "nbe" : "a";
7247 gcc_assert (mode == CCmode);
7269 gcc_assert (mode == CCmode);
7270 suffix = fp ? "nb" : "ae";
7273 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7277 gcc_assert (mode == CCmode);
7281 suffix = fp ? "u" : "p";
7284 suffix = fp ? "nu" : "np";
7289 fputs (suffix, file);
7292 /* Print the name of register X to FILE based on its machine mode and number.
7293 If CODE is 'w', pretend the mode is HImode.
7294 If CODE is 'b', pretend the mode is QImode.
7295 If CODE is 'k', pretend the mode is SImode.
7296 If CODE is 'q', pretend the mode is DImode.
7297 If CODE is 'h', pretend the reg is the 'high' byte register.
7298 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7301 print_reg (rtx x, int code, FILE *file)
7303 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7304 && REGNO (x) != FRAME_POINTER_REGNUM
7305 && REGNO (x) != FLAGS_REG
7306 && REGNO (x) != FPSR_REG);
7308 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7311 if (code == 'w' || MMX_REG_P (x))
7313 else if (code == 'b')
7315 else if (code == 'k')
7317 else if (code == 'q')
7319 else if (code == 'y')
7321 else if (code == 'h')
7324 code = GET_MODE_SIZE (GET_MODE (x));
7326 /* Irritatingly, AMD extended registers use different naming convention
7327 from the normal registers. */
7328 if (REX_INT_REG_P (x))
7330 gcc_assert (TARGET_64BIT);
7334 error ("extended registers have no high halves");
7337 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7340 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7343 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7346 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7349 error ("unsupported operand size for extended register");
7357 if (STACK_TOP_P (x))
7359 fputs ("st(0)", file);
7366 if (! ANY_FP_REG_P (x))
7367 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7372 fputs (hi_reg_name[REGNO (x)], file);
7375 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7377 fputs (qi_reg_name[REGNO (x)], file);
7380 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7382 fputs (qi_high_reg_name[REGNO (x)], file);
7389 /* Locate some local-dynamic symbol still in use by this function
7390 so that we can print its name in some tls_local_dynamic_base
7394 get_some_local_dynamic_name (void)
7398 if (cfun->machine->some_ld_name)
7399 return cfun->machine->some_ld_name;
7401 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7403 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7404 return cfun->machine->some_ld_name;
7410 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7414 if (GET_CODE (x) == SYMBOL_REF
7415 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7417 cfun->machine->some_ld_name = XSTR (x, 0);
7425 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7426 C -- print opcode suffix for set/cmov insn.
7427 c -- like C, but print reversed condition
7428 F,f -- likewise, but for floating-point.
7429 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7431 R -- print the prefix for register names.
7432 z -- print the opcode suffix for the size of the current operand.
7433 * -- print a star (in certain assembler syntax)
7434 A -- print an absolute memory reference.
7435 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7436 s -- print a shift double count, followed by the assemblers argument
7438 b -- print the QImode name of the register for the indicated operand.
7439 %b0 would print %al if operands[0] is reg 0.
7440 w -- likewise, print the HImode name of the register.
7441 k -- likewise, print the SImode name of the register.
7442 q -- likewise, print the DImode name of the register.
7443 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7444 y -- print "st(0)" instead of "st" as a register.
7445 D -- print condition for SSE cmp instruction.
7446 P -- if PIC, print an @PLT suffix.
7447 X -- don't print any sort of PIC '@' suffix for a symbol.
7448 & -- print some in-use local-dynamic symbol name.
7449 H -- print a memory address offset by 8; used for sse high-parts
7453 print_operand (FILE *file, rtx x, int code)
7460 if (ASSEMBLER_DIALECT == ASM_ATT)
7465 assemble_name (file, get_some_local_dynamic_name ());
7469 switch (ASSEMBLER_DIALECT)
7476 /* Intel syntax. For absolute addresses, registers should not
7477 be surrounded by braces. */
7478 if (GET_CODE (x) != REG)
7481 PRINT_OPERAND (file, x, 0);
7491 PRINT_OPERAND (file, x, 0);
7496 if (ASSEMBLER_DIALECT == ASM_ATT)
7501 if (ASSEMBLER_DIALECT == ASM_ATT)
7506 if (ASSEMBLER_DIALECT == ASM_ATT)
7511 if (ASSEMBLER_DIALECT == ASM_ATT)
7516 if (ASSEMBLER_DIALECT == ASM_ATT)
7521 if (ASSEMBLER_DIALECT == ASM_ATT)
7526 /* 387 opcodes don't get size suffixes if the operands are
7528 if (STACK_REG_P (x))
7531 /* Likewise if using Intel opcodes. */
7532 if (ASSEMBLER_DIALECT == ASM_INTEL)
7535 /* This is the size of op from size of operand. */
7536 switch (GET_MODE_SIZE (GET_MODE (x)))
7539 #ifdef HAVE_GAS_FILDS_FISTS
7545 if (GET_MODE (x) == SFmode)
7560 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7562 #ifdef GAS_MNEMONICS
7588 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7590 PRINT_OPERAND (file, x, 0);
7596 /* Little bit of braindamage here. The SSE compare instructions
7597 does use completely different names for the comparisons that the
7598 fp conditional moves. */
7599 switch (GET_CODE (x))
7614 fputs ("unord", file);
7618 fputs ("neq", file);
7622 fputs ("nlt", file);
7626 fputs ("nle", file);
7629 fputs ("ord", file);
7636 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7637 if (ASSEMBLER_DIALECT == ASM_ATT)
7639 switch (GET_MODE (x))
7641 case HImode: putc ('w', file); break;
7643 case SFmode: putc ('l', file); break;
7645 case DFmode: putc ('q', file); break;
7646 default: gcc_unreachable ();
7653 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7656 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7657 if (ASSEMBLER_DIALECT == ASM_ATT)
7660 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7663 /* Like above, but reverse condition */
7665 /* Check to see if argument to %c is really a constant
7666 and not a condition code which needs to be reversed. */
7667 if (!COMPARISON_P (x))
7669 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7672 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7675 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7676 if (ASSEMBLER_DIALECT == ASM_ATT)
7679 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7683 /* It doesn't actually matter what mode we use here, as we're
7684 only going to use this for printing. */
7685 x = adjust_address_nv (x, DImode, 8);
7692 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7695 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7698 int pred_val = INTVAL (XEXP (x, 0));
7700 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7701 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7703 int taken = pred_val > REG_BR_PROB_BASE / 2;
7704 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7706 /* Emit hints only in the case default branch prediction
7707 heuristics would fail. */
7708 if (taken != cputaken)
7710 /* We use 3e (DS) prefix for taken branches and
7711 2e (CS) prefix for not taken branches. */
7713 fputs ("ds ; ", file);
7715 fputs ("cs ; ", file);
7722 output_operand_lossage ("invalid operand code '%c'", code);
7726 if (GET_CODE (x) == REG)
7727 print_reg (x, code, file);
7729 else if (GET_CODE (x) == MEM)
7731 /* No `byte ptr' prefix for call instructions. */
7732 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7735 switch (GET_MODE_SIZE (GET_MODE (x)))
7737 case 1: size = "BYTE"; break;
7738 case 2: size = "WORD"; break;
7739 case 4: size = "DWORD"; break;
7740 case 8: size = "QWORD"; break;
7741 case 12: size = "XWORD"; break;
7742 case 16: size = "XMMWORD"; break;
7747 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7750 else if (code == 'w')
7752 else if (code == 'k')
7756 fputs (" PTR ", file);
7760 /* Avoid (%rip) for call operands. */
7761 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7762 && GET_CODE (x) != CONST_INT)
7763 output_addr_const (file, x);
7764 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7765 output_operand_lossage ("invalid constraints for operand");
7770 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7775 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7776 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7778 if (ASSEMBLER_DIALECT == ASM_ATT)
7780 fprintf (file, "0x%08lx", l);
7783 /* These float cases don't actually occur as immediate operands. */
7784 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7788 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7789 fprintf (file, "%s", dstr);
7792 else if (GET_CODE (x) == CONST_DOUBLE
7793 && GET_MODE (x) == XFmode)
7797 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7798 fprintf (file, "%s", dstr);
7803 /* We have patterns that allow zero sets of memory, for instance.
7804 In 64-bit mode, we should probably support all 8-byte vectors,
7805 since we can in fact encode that into an immediate. */
7806 if (GET_CODE (x) == CONST_VECTOR)
7808 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7814 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7816 if (ASSEMBLER_DIALECT == ASM_ATT)
7819 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7820 || GET_CODE (x) == LABEL_REF)
7822 if (ASSEMBLER_DIALECT == ASM_ATT)
7825 fputs ("OFFSET FLAT:", file);
7828 if (GET_CODE (x) == CONST_INT)
7829 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7831 output_pic_addr_const (file, x, code);
7833 output_addr_const (file, x);
7837 /* Print a memory operand whose address is ADDR. */
7840 print_operand_address (FILE *file, rtx addr)
7842 struct ix86_address parts;
7843 rtx base, index, disp;
7845 int ok = ix86_decompose_address (addr, &parts);
7850 index = parts.index;
7852 scale = parts.scale;
7860 if (USER_LABEL_PREFIX[0] == 0)
7862 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7868 if (!base && !index)
7870 /* Displacement only requires special attention. */
7872 if (GET_CODE (disp) == CONST_INT)
7874 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7876 if (USER_LABEL_PREFIX[0] == 0)
7878 fputs ("ds:", file);
7880 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7883 output_pic_addr_const (file, disp, 0);
7885 output_addr_const (file, disp);
7887 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7890 if (GET_CODE (disp) == CONST
7891 && GET_CODE (XEXP (disp, 0)) == PLUS
7892 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7893 disp = XEXP (XEXP (disp, 0), 0);
7894 if (GET_CODE (disp) == LABEL_REF
7895 || (GET_CODE (disp) == SYMBOL_REF
7896 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7897 fputs ("(%rip)", file);
7902 if (ASSEMBLER_DIALECT == ASM_ATT)
7907 output_pic_addr_const (file, disp, 0);
7908 else if (GET_CODE (disp) == LABEL_REF)
7909 output_asm_label (disp);
7911 output_addr_const (file, disp);
7916 print_reg (base, 0, file);
7920 print_reg (index, 0, file);
7922 fprintf (file, ",%d", scale);
7928 rtx offset = NULL_RTX;
7932 /* Pull out the offset of a symbol; print any symbol itself. */
7933 if (GET_CODE (disp) == CONST
7934 && GET_CODE (XEXP (disp, 0)) == PLUS
7935 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7937 offset = XEXP (XEXP (disp, 0), 1);
7938 disp = gen_rtx_CONST (VOIDmode,
7939 XEXP (XEXP (disp, 0), 0));
7943 output_pic_addr_const (file, disp, 0);
7944 else if (GET_CODE (disp) == LABEL_REF)
7945 output_asm_label (disp);
7946 else if (GET_CODE (disp) == CONST_INT)
7949 output_addr_const (file, disp);
7955 print_reg (base, 0, file);
7958 if (INTVAL (offset) >= 0)
7960 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7964 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7971 print_reg (index, 0, file);
7973 fprintf (file, "*%d", scale);
7981 output_addr_const_extra (FILE *file, rtx x)
7985 if (GET_CODE (x) != UNSPEC)
7988 op = XVECEXP (x, 0, 0);
7989 switch (XINT (x, 1))
7991 case UNSPEC_GOTTPOFF:
7992 output_addr_const (file, op);
7993 /* FIXME: This might be @TPOFF in Sun ld. */
7994 fputs ("@GOTTPOFF", file);
7997 output_addr_const (file, op);
7998 fputs ("@TPOFF", file);
8001 output_addr_const (file, op);
8003 fputs ("@TPOFF", file);
8005 fputs ("@NTPOFF", file);
8008 output_addr_const (file, op);
8009 fputs ("@DTPOFF", file);
8011 case UNSPEC_GOTNTPOFF:
8012 output_addr_const (file, op);
8014 fputs ("@GOTTPOFF(%rip)", file);
8016 fputs ("@GOTNTPOFF", file);
8018 case UNSPEC_INDNTPOFF:
8019 output_addr_const (file, op);
8020 fputs ("@INDNTPOFF", file);
8030 /* Split one or more DImode RTL references into pairs of SImode
8031 references. The RTL can be REG, offsettable MEM, integer constant, or
8032 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8033 split and "num" is its length. lo_half and hi_half are output arrays
8034 that parallel "operands". */
8037 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8041 rtx op = operands[num];
8043 /* simplify_subreg refuse to split volatile memory addresses,
8044 but we still have to handle it. */
8045 if (GET_CODE (op) == MEM)
8047 lo_half[num] = adjust_address (op, SImode, 0);
8048 hi_half[num] = adjust_address (op, SImode, 4);
8052 lo_half[num] = simplify_gen_subreg (SImode, op,
8053 GET_MODE (op) == VOIDmode
8054 ? DImode : GET_MODE (op), 0);
8055 hi_half[num] = simplify_gen_subreg (SImode, op,
8056 GET_MODE (op) == VOIDmode
8057 ? DImode : GET_MODE (op), 4);
8061 /* Split one or more TImode RTL references into pairs of DImode
8062 references. The RTL can be REG, offsettable MEM, integer constant, or
8063 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8064 split and "num" is its length. lo_half and hi_half are output arrays
8065 that parallel "operands". */
8068 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8072 rtx op = operands[num];
8074 /* simplify_subreg refuse to split volatile memory addresses, but we
8075 still have to handle it. */
8076 if (GET_CODE (op) == MEM)
8078 lo_half[num] = adjust_address (op, DImode, 0);
8079 hi_half[num] = adjust_address (op, DImode, 8);
8083 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8084 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8089 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8090 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8091 is the expression of the binary operation. The output may either be
8092 emitted here, or returned to the caller, like all output_* functions.
8094 There is no guarantee that the operands are the same mode, as they
8095 might be within FLOAT or FLOAT_EXTEND expressions. */
8097 #ifndef SYSV386_COMPAT
8098 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8099 wants to fix the assemblers because that causes incompatibility
8100 with gcc. No-one wants to fix gcc because that causes
8101 incompatibility with assemblers... You can use the option of
8102 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8103 #define SYSV386_COMPAT 1
8107 output_387_binary_op (rtx insn, rtx *operands)
8109 static char buf[30];
8112 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8114 #ifdef ENABLE_CHECKING
8115 /* Even if we do not want to check the inputs, this documents input
8116 constraints. Which helps in understanding the following code. */
8117 if (STACK_REG_P (operands[0])
8118 && ((REG_P (operands[1])
8119 && REGNO (operands[0]) == REGNO (operands[1])
8120 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8121 || (REG_P (operands[2])
8122 && REGNO (operands[0]) == REGNO (operands[2])
8123 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8124 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8127 gcc_assert (is_sse);
8130 switch (GET_CODE (operands[3]))
8133 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8134 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8142 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8143 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8151 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8152 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8160 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8161 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8175 if (GET_MODE (operands[0]) == SFmode)
8176 strcat (buf, "ss\t{%2, %0|%0, %2}");
8178 strcat (buf, "sd\t{%2, %0|%0, %2}");
8183 switch (GET_CODE (operands[3]))
8187 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8189 rtx temp = operands[2];
8190 operands[2] = operands[1];
8194 /* know operands[0] == operands[1]. */
8196 if (GET_CODE (operands[2]) == MEM)
8202 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8204 if (STACK_TOP_P (operands[0]))
8205 /* How is it that we are storing to a dead operand[2]?
8206 Well, presumably operands[1] is dead too. We can't
8207 store the result to st(0) as st(0) gets popped on this
8208 instruction. Instead store to operands[2] (which I
8209 think has to be st(1)). st(1) will be popped later.
8210 gcc <= 2.8.1 didn't have this check and generated
8211 assembly code that the Unixware assembler rejected. */
8212 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8214 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8218 if (STACK_TOP_P (operands[0]))
8219 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8221 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8226 if (GET_CODE (operands[1]) == MEM)
8232 if (GET_CODE (operands[2]) == MEM)
8238 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8241 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8242 derived assemblers, confusingly reverse the direction of
8243 the operation for fsub{r} and fdiv{r} when the
8244 destination register is not st(0). The Intel assembler
8245 doesn't have this brain damage. Read !SYSV386_COMPAT to
8246 figure out what the hardware really does. */
8247 if (STACK_TOP_P (operands[0]))
8248 p = "{p\t%0, %2|rp\t%2, %0}";
8250 p = "{rp\t%2, %0|p\t%0, %2}";
8252 if (STACK_TOP_P (operands[0]))
8253 /* As above for fmul/fadd, we can't store to st(0). */
8254 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8256 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8261 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8264 if (STACK_TOP_P (operands[0]))
8265 p = "{rp\t%0, %1|p\t%1, %0}";
8267 p = "{p\t%1, %0|rp\t%0, %1}";
8269 if (STACK_TOP_P (operands[0]))
8270 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8272 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8277 if (STACK_TOP_P (operands[0]))
8279 if (STACK_TOP_P (operands[1]))
8280 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8282 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8285 else if (STACK_TOP_P (operands[1]))
8288 p = "{\t%1, %0|r\t%0, %1}";
8290 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8296 p = "{r\t%2, %0|\t%0, %2}";
8298 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8311 /* Return needed mode for entity in optimize_mode_switching pass. */
8314 ix86_mode_needed (int entity, rtx insn)
8316 enum attr_i387_cw mode;
8318 /* The mode UNINITIALIZED is used to store control word after a
8319 function call or ASM pattern. The mode ANY specify that function
8320 has no requirements on the control word and make no changes in the
8321 bits we are interested in. */
8324 || (NONJUMP_INSN_P (insn)
8325 && (asm_noperands (PATTERN (insn)) >= 0
8326 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8327 return I387_CW_UNINITIALIZED;
8329 if (recog_memoized (insn) < 0)
8332 mode = get_attr_i387_cw (insn);
8337 if (mode == I387_CW_TRUNC)
8342 if (mode == I387_CW_FLOOR)
8347 if (mode == I387_CW_CEIL)
8352 if (mode == I387_CW_MASK_PM)
8363 /* Output code to initialize control word copies used by trunc?f?i and
8364 rounding patterns. CURRENT_MODE is set to current control word,
8365 while NEW_MODE is set to new control word. */
8368 emit_i387_cw_initialization (int mode)
8370 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8375 rtx reg = gen_reg_rtx (HImode);
8377 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8378 emit_move_insn (reg, stored_mode);
8380 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8385 /* round toward zero (truncate) */
8386 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8387 slot = SLOT_CW_TRUNC;
8391 /* round down toward -oo */
8392 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8393 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8394 slot = SLOT_CW_FLOOR;
8398 /* round up toward +oo */
8399 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8401 slot = SLOT_CW_CEIL;
8404 case I387_CW_MASK_PM:
8405 /* mask precision exception for nearbyint() */
8406 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8407 slot = SLOT_CW_MASK_PM;
8419 /* round toward zero (truncate) */
8420 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8421 slot = SLOT_CW_TRUNC;
8425 /* round down toward -oo */
8426 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8427 slot = SLOT_CW_FLOOR;
8431 /* round up toward +oo */
8432 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8433 slot = SLOT_CW_CEIL;
8436 case I387_CW_MASK_PM:
8437 /* mask precision exception for nearbyint() */
8438 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8439 slot = SLOT_CW_MASK_PM;
8447 gcc_assert (slot < MAX_386_STACK_LOCALS);
8449 new_mode = assign_386_stack_local (HImode, slot);
8450 emit_move_insn (new_mode, reg);
8453 /* Output code for INSN to convert a float to a signed int. OPERANDS
8454 are the insn operands. The output may be [HSD]Imode and the input
8455 operand may be [SDX]Fmode. */
8458 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8460 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8461 int dimode_p = GET_MODE (operands[0]) == DImode;
8462 int round_mode = get_attr_i387_cw (insn);
8464 /* Jump through a hoop or two for DImode, since the hardware has no
8465 non-popping instruction. We used to do this a different way, but
8466 that was somewhat fragile and broke with post-reload splitters. */
8467 if ((dimode_p || fisttp) && !stack_top_dies)
8468 output_asm_insn ("fld\t%y1", operands);
8470 gcc_assert (STACK_TOP_P (operands[1]));
8471 gcc_assert (GET_CODE (operands[0]) == MEM);
8474 output_asm_insn ("fisttp%z0\t%0", operands);
8477 if (round_mode != I387_CW_ANY)
8478 output_asm_insn ("fldcw\t%3", operands);
8479 if (stack_top_dies || dimode_p)
8480 output_asm_insn ("fistp%z0\t%0", operands);
8482 output_asm_insn ("fist%z0\t%0", operands);
8483 if (round_mode != I387_CW_ANY)
8484 output_asm_insn ("fldcw\t%2", operands);
8490 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8491 should be used. UNORDERED_P is true when fucom should be used. */
8494 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8497 rtx cmp_op0, cmp_op1;
8498 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8502 cmp_op0 = operands[0];
8503 cmp_op1 = operands[1];
8507 cmp_op0 = operands[1];
8508 cmp_op1 = operands[2];
8513 if (GET_MODE (operands[0]) == SFmode)
8515 return "ucomiss\t{%1, %0|%0, %1}";
8517 return "comiss\t{%1, %0|%0, %1}";
8520 return "ucomisd\t{%1, %0|%0, %1}";
8522 return "comisd\t{%1, %0|%0, %1}";
8525 gcc_assert (STACK_TOP_P (cmp_op0));
8527 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8529 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8533 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8534 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8537 return "ftst\n\tfnstsw\t%0";
8540 if (STACK_REG_P (cmp_op1)
8542 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8543 && REGNO (cmp_op1) != FIRST_STACK_REG)
8545 /* If both the top of the 387 stack dies, and the other operand
8546 is also a stack register that dies, then this must be a
8547 `fcompp' float compare */
8551 /* There is no double popping fcomi variant. Fortunately,
8552 eflags is immune from the fstp's cc clobbering. */
8554 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8556 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8557 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8562 return "fucompp\n\tfnstsw\t%0";
8564 return "fcompp\n\tfnstsw\t%0";
8569 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8571 static const char * const alt[16] =
8573 "fcom%z2\t%y2\n\tfnstsw\t%0",
8574 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8575 "fucom%z2\t%y2\n\tfnstsw\t%0",
8576 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8578 "ficom%z2\t%y2\n\tfnstsw\t%0",
8579 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8583 "fcomi\t{%y1, %0|%0, %y1}",
8584 "fcomip\t{%y1, %0|%0, %y1}",
8585 "fucomi\t{%y1, %0|%0, %y1}",
8586 "fucomip\t{%y1, %0|%0, %y1}",
8597 mask = eflags_p << 3;
8598 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8599 mask |= unordered_p << 1;
8600 mask |= stack_top_dies;
8602 gcc_assert (mask < 16);
8611 ix86_output_addr_vec_elt (FILE *file, int value)
8613 const char *directive = ASM_LONG;
8617 directive = ASM_QUAD;
8619 gcc_assert (!TARGET_64BIT);
8622 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8626 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8629 fprintf (file, "%s%s%d-%s%d\n",
8630 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8631 else if (HAVE_AS_GOTOFF_IN_DATA)
8632 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8634 else if (TARGET_MACHO)
8636 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8637 machopic_output_function_base_name (file);
8638 fprintf(file, "\n");
8642 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8643 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8646 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8650 ix86_expand_clear (rtx dest)
8654 /* We play register width games, which are only valid after reload. */
8655 gcc_assert (reload_completed);
8657 /* Avoid HImode and its attendant prefix byte. */
8658 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8659 dest = gen_rtx_REG (SImode, REGNO (dest));
8661 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8663 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8664 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8666 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8667 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8673 /* X is an unchanging MEM. If it is a constant pool reference, return
8674 the constant pool rtx, else NULL. */
8677 maybe_get_pool_constant (rtx x)
8679 x = ix86_delegitimize_address (XEXP (x, 0));
8681 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8682 return get_pool_constant (x);
8688 ix86_expand_move (enum machine_mode mode, rtx operands[])
8690 int strict = (reload_in_progress || reload_completed);
8692 enum tls_model model;
8697 if (GET_CODE (op1) == SYMBOL_REF)
8699 model = SYMBOL_REF_TLS_MODEL (op1);
8702 op1 = legitimize_tls_address (op1, model, true);
8703 op1 = force_operand (op1, op0);
8708 else if (GET_CODE (op1) == CONST
8709 && GET_CODE (XEXP (op1, 0)) == PLUS
8710 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8712 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8715 rtx addend = XEXP (XEXP (op1, 0), 1);
8716 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8717 op1 = force_operand (op1, NULL);
8718 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8719 op0, 1, OPTAB_DIRECT);
8725 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8730 rtx temp = ((reload_in_progress
8731 || ((op0 && GET_CODE (op0) == REG)
8733 ? op0 : gen_reg_rtx (Pmode));
8734 op1 = machopic_indirect_data_reference (op1, temp);
8735 op1 = machopic_legitimize_pic_address (op1, mode,
8736 temp == op1 ? 0 : temp);
8738 else if (MACHOPIC_INDIRECT)
8739 op1 = machopic_indirect_data_reference (op1, 0);
8743 if (GET_CODE (op0) == MEM)
8744 op1 = force_reg (Pmode, op1);
8746 op1 = legitimize_address (op1, op1, Pmode);
8747 #endif /* TARGET_MACHO */
8751 if (GET_CODE (op0) == MEM
8752 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8753 || !push_operand (op0, mode))
8754 && GET_CODE (op1) == MEM)
8755 op1 = force_reg (mode, op1);
8757 if (push_operand (op0, mode)
8758 && ! general_no_elim_operand (op1, mode))
8759 op1 = copy_to_mode_reg (mode, op1);
8761 /* Force large constants in 64bit compilation into register
8762 to get them CSEed. */
8763 if (TARGET_64BIT && mode == DImode
8764 && immediate_operand (op1, mode)
8765 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8766 && !register_operand (op0, mode)
8767 && optimize && !reload_completed && !reload_in_progress)
8768 op1 = copy_to_mode_reg (mode, op1);
8770 if (FLOAT_MODE_P (mode))
8772 /* If we are loading a floating point constant to a register,
8773 force the value to memory now, since we'll get better code
8774 out the back end. */
8778 else if (GET_CODE (op1) == CONST_DOUBLE)
8780 op1 = validize_mem (force_const_mem (mode, op1));
8781 if (!register_operand (op0, mode))
8783 rtx temp = gen_reg_rtx (mode);
8784 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8785 emit_move_insn (op0, temp);
8792 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8796 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8798 rtx op0 = operands[0], op1 = operands[1];
8800 /* Force constants other than zero into memory. We do not know how
8801 the instructions used to build constants modify the upper 64 bits
8802 of the register, once we have that information we may be able
8803 to handle some of them more efficiently. */
8804 if ((reload_in_progress | reload_completed) == 0
8805 && register_operand (op0, mode)
8806 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8807 op1 = validize_mem (force_const_mem (mode, op1));
8809 /* Make operand1 a register if it isn't already. */
8811 && !register_operand (op0, mode)
8812 && !register_operand (op1, mode))
8814 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8818 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8821 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8822 straight to ix86_expand_vector_move. */
8825 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8834 /* If we're optimizing for size, movups is the smallest. */
8837 op0 = gen_lowpart (V4SFmode, op0);
8838 op1 = gen_lowpart (V4SFmode, op1);
8839 emit_insn (gen_sse_movups (op0, op1));
8843 /* ??? If we have typed data, then it would appear that using
8844 movdqu is the only way to get unaligned data loaded with
8846 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8848 op0 = gen_lowpart (V16QImode, op0);
8849 op1 = gen_lowpart (V16QImode, op1);
8850 emit_insn (gen_sse2_movdqu (op0, op1));
8854 if (TARGET_SSE2 && mode == V2DFmode)
8858 /* When SSE registers are split into halves, we can avoid
8859 writing to the top half twice. */
8860 if (TARGET_SSE_SPLIT_REGS)
8862 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8867 /* ??? Not sure about the best option for the Intel chips.
8868 The following would seem to satisfy; the register is
8869 entirely cleared, breaking the dependency chain. We
8870 then store to the upper half, with a dependency depth
8871 of one. A rumor has it that Intel recommends two movsd
8872 followed by an unpacklpd, but this is unconfirmed. And
8873 given that the dependency depth of the unpacklpd would
8874 still be one, I'm not sure why this would be better. */
8875 zero = CONST0_RTX (V2DFmode);
8878 m = adjust_address (op1, DFmode, 0);
8879 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8880 m = adjust_address (op1, DFmode, 8);
8881 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8885 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8886 emit_move_insn (op0, CONST0_RTX (mode));
8888 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8890 if (mode != V4SFmode)
8891 op0 = gen_lowpart (V4SFmode, op0);
8892 m = adjust_address (op1, V2SFmode, 0);
8893 emit_insn (gen_sse_loadlps (op0, op0, m));
8894 m = adjust_address (op1, V2SFmode, 8);
8895 emit_insn (gen_sse_loadhps (op0, op0, m));
8898 else if (MEM_P (op0))
8900 /* If we're optimizing for size, movups is the smallest. */
8903 op0 = gen_lowpart (V4SFmode, op0);
8904 op1 = gen_lowpart (V4SFmode, op1);
8905 emit_insn (gen_sse_movups (op0, op1));
8909 /* ??? Similar to above, only less clear because of quote
8910 typeless stores unquote. */
8911 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8912 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8914 op0 = gen_lowpart (V16QImode, op0);
8915 op1 = gen_lowpart (V16QImode, op1);
8916 emit_insn (gen_sse2_movdqu (op0, op1));
8920 if (TARGET_SSE2 && mode == V2DFmode)
8922 m = adjust_address (op0, DFmode, 0);
8923 emit_insn (gen_sse2_storelpd (m, op1));
8924 m = adjust_address (op0, DFmode, 8);
8925 emit_insn (gen_sse2_storehpd (m, op1));
8929 if (mode != V4SFmode)
8930 op1 = gen_lowpart (V4SFmode, op1);
8931 m = adjust_address (op0, V2SFmode, 0);
8932 emit_insn (gen_sse_storelps (m, op1));
8933 m = adjust_address (op0, V2SFmode, 8);
8934 emit_insn (gen_sse_storehps (m, op1));
8941 /* Expand a push in MODE. This is some mode for which we do not support
8942 proper push instructions, at least from the registers that we expect
8943 the value to live in. */
8946 ix86_expand_push (enum machine_mode mode, rtx x)
8950 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8951 GEN_INT (-GET_MODE_SIZE (mode)),
8952 stack_pointer_rtx, 1, OPTAB_DIRECT);
8953 if (tmp != stack_pointer_rtx)
8954 emit_move_insn (stack_pointer_rtx, tmp);
8956 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8957 emit_move_insn (tmp, x);
8960 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8961 destination to use for the operation. If different from the true
8962 destination in operands[0], a copy operation will be required. */
8965 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8968 int matching_memory;
8969 rtx src1, src2, dst;
8975 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8976 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8977 && (rtx_equal_p (dst, src2)
8978 || immediate_operand (src1, mode)))
8985 /* If the destination is memory, and we do not have matching source
8986 operands, do things in registers. */
8987 matching_memory = 0;
8988 if (GET_CODE (dst) == MEM)
8990 if (rtx_equal_p (dst, src1))
8991 matching_memory = 1;
8992 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8993 && rtx_equal_p (dst, src2))
8994 matching_memory = 2;
8996 dst = gen_reg_rtx (mode);
8999 /* Both source operands cannot be in memory. */
9000 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9002 if (matching_memory != 2)
9003 src2 = force_reg (mode, src2);
9005 src1 = force_reg (mode, src1);
9008 /* If the operation is not commutable, source 1 cannot be a constant
9009 or non-matching memory. */
9010 if ((CONSTANT_P (src1)
9011 || (!matching_memory && GET_CODE (src1) == MEM))
9012 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9013 src1 = force_reg (mode, src1);
9015 src1 = operands[1] = src1;
9016 src2 = operands[2] = src2;
9020 /* Similarly, but assume that the destination has already been
9024 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9025 enum machine_mode mode, rtx operands[])
9027 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9028 gcc_assert (dst == operands[0]);
9031 /* Attempt to expand a binary operator. Make the expansion closer to the
9032 actual machine, then just general_operand, which will allow 3 separate
9033 memory references (one output, two input) in a single insn. */
9036 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9039 rtx src1, src2, dst, op, clob;
9041 dst = ix86_fixup_binary_operands (code, mode, operands);
9045 /* Emit the instruction. */
9047 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9048 if (reload_in_progress)
9050 /* Reload doesn't know about the flags register, and doesn't know that
9051 it doesn't want to clobber it. We can only do this with PLUS. */
9052 gcc_assert (code == PLUS);
9057 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9058 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9061 /* Fix up the destination if needed. */
9062 if (dst != operands[0])
9063 emit_move_insn (operands[0], dst);
9066 /* Return TRUE or FALSE depending on whether the binary operator meets the
9067 appropriate constraints. */
9070 ix86_binary_operator_ok (enum rtx_code code,
9071 enum machine_mode mode ATTRIBUTE_UNUSED,
9074 /* Both source operands cannot be in memory. */
9075 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9077 /* If the operation is not commutable, source 1 cannot be a constant. */
9078 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9080 /* If the destination is memory, we must have a matching source operand. */
9081 if (GET_CODE (operands[0]) == MEM
9082 && ! (rtx_equal_p (operands[0], operands[1])
9083 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9084 && rtx_equal_p (operands[0], operands[2]))))
9086 /* If the operation is not commutable and the source 1 is memory, we must
9087 have a matching destination. */
9088 if (GET_CODE (operands[1]) == MEM
9089 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9090 && ! rtx_equal_p (operands[0], operands[1]))
9095 /* Attempt to expand a unary operator. Make the expansion closer to the
9096 actual machine, then just general_operand, which will allow 2 separate
9097 memory references (one output, one input) in a single insn. */
9100 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9103 int matching_memory;
9104 rtx src, dst, op, clob;
9109 /* If the destination is memory, and we do not have matching source
9110 operands, do things in registers. */
9111 matching_memory = 0;
9114 if (rtx_equal_p (dst, src))
9115 matching_memory = 1;
9117 dst = gen_reg_rtx (mode);
9120 /* When source operand is memory, destination must match. */
9121 if (MEM_P (src) && !matching_memory)
9122 src = force_reg (mode, src);
9124 /* Emit the instruction. */
9126 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9127 if (reload_in_progress || code == NOT)
9129 /* Reload doesn't know about the flags register, and doesn't know that
9130 it doesn't want to clobber it. */
9131 gcc_assert (code == NOT);
9136 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9137 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9140 /* Fix up the destination if needed. */
9141 if (dst != operands[0])
9142 emit_move_insn (operands[0], dst);
9145 /* Return TRUE or FALSE depending on whether the unary operator meets the
9146 appropriate constraints. */
9149 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9150 enum machine_mode mode ATTRIBUTE_UNUSED,
9151 rtx operands[2] ATTRIBUTE_UNUSED)
9153 /* If one of operands is memory, source and destination must match. */
9154 if ((GET_CODE (operands[0]) == MEM
9155 || GET_CODE (operands[1]) == MEM)
9156 && ! rtx_equal_p (operands[0], operands[1]))
9161 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9162 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9163 true, then replicate the mask for all elements of the vector register.
9164 If INVERT is true, then create a mask excluding the sign bit. */
9167 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9169 enum machine_mode vec_mode;
9170 HOST_WIDE_INT hi, lo;
9175 /* Find the sign bit, sign extended to 2*HWI. */
9177 lo = 0x80000000, hi = lo < 0;
9178 else if (HOST_BITS_PER_WIDE_INT >= 64)
9179 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9181 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9186 /* Force this value into the low part of a fp vector constant. */
9187 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9188 mask = gen_lowpart (mode, mask);
9193 v = gen_rtvec (4, mask, mask, mask, mask);
9195 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9196 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9197 vec_mode = V4SFmode;
9202 v = gen_rtvec (2, mask, mask);
9204 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9205 vec_mode = V2DFmode;
9208 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9211 /* Generate code for floating point ABS or NEG. */
9214 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9217 rtx mask, set, use, clob, dst, src;
9218 bool matching_memory;
9219 bool use_sse = false;
9220 bool vector_mode = VECTOR_MODE_P (mode);
9221 enum machine_mode elt_mode = mode;
9225 elt_mode = GET_MODE_INNER (mode);
9228 else if (TARGET_SSE_MATH)
9229 use_sse = SSE_FLOAT_MODE_P (mode);
9231 /* NEG and ABS performed with SSE use bitwise mask operations.
9232 Create the appropriate mask now. */
9234 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9237 /* When not using SSE, we don't use the mask, but prefer to keep the
9238 same general form of the insn pattern to reduce duplication when
9239 it comes time to split. */
9246 /* If the destination is memory, and we don't have matching source
9247 operands, do things in registers. */
9248 matching_memory = false;
9251 if (rtx_equal_p (dst, src))
9252 matching_memory = true;
9254 dst = gen_reg_rtx (mode);
9256 if (MEM_P (src) && !matching_memory)
9257 src = force_reg (mode, src);
9261 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9262 set = gen_rtx_SET (VOIDmode, dst, set);
9267 set = gen_rtx_fmt_e (code, mode, src);
9268 set = gen_rtx_SET (VOIDmode, dst, set);
9269 use = gen_rtx_USE (VOIDmode, mask);
9270 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9271 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9274 if (dst != operands[0])
9275 emit_move_insn (operands[0], dst);
9278 /* Expand a copysign operation. Special case operand 0 being a constant. */
9281 ix86_expand_copysign (rtx operands[])
9283 enum machine_mode mode, vmode;
9284 rtx dest, op0, op1, mask, nmask;
9290 mode = GET_MODE (dest);
9291 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9293 if (GET_CODE (op0) == CONST_DOUBLE)
9297 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9298 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9300 if (op0 == CONST0_RTX (mode))
9301 op0 = CONST0_RTX (vmode);
9305 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9306 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9308 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9309 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9312 mask = ix86_build_signbit_mask (mode, 0, 0);
9315 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9317 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9321 nmask = ix86_build_signbit_mask (mode, 0, 1);
9322 mask = ix86_build_signbit_mask (mode, 0, 0);
9325 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9327 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9331 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9332 be a constant, and so has already been expanded into a vector constant. */
9335 ix86_split_copysign_const (rtx operands[])
9337 enum machine_mode mode, vmode;
9338 rtx dest, op0, op1, mask, x;
9345 mode = GET_MODE (dest);
9346 vmode = GET_MODE (mask);
9348 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9349 x = gen_rtx_AND (vmode, dest, mask);
9350 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9352 if (op0 != CONST0_RTX (vmode))
9354 x = gen_rtx_IOR (vmode, dest, op0);
9355 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9359 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9360 so we have to do two masks. */
9363 ix86_split_copysign_var (rtx operands[])
9365 enum machine_mode mode, vmode;
9366 rtx dest, scratch, op0, op1, mask, nmask, x;
9369 scratch = operands[1];
9372 nmask = operands[4];
9375 mode = GET_MODE (dest);
9376 vmode = GET_MODE (mask);
9378 if (rtx_equal_p (op0, op1))
9380 /* Shouldn't happen often (it's useless, obviously), but when it does
9381 we'd generate incorrect code if we continue below. */
9382 emit_move_insn (dest, op0);
9386 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9388 gcc_assert (REGNO (op1) == REGNO (scratch));
9390 x = gen_rtx_AND (vmode, scratch, mask);
9391 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9394 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9395 x = gen_rtx_NOT (vmode, dest);
9396 x = gen_rtx_AND (vmode, x, op0);
9397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9401 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9403 x = gen_rtx_AND (vmode, scratch, mask);
9405 else /* alternative 2,4 */
9407 gcc_assert (REGNO (mask) == REGNO (scratch));
9408 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9409 x = gen_rtx_AND (vmode, scratch, op1);
9411 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9413 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9415 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9416 x = gen_rtx_AND (vmode, dest, nmask);
9418 else /* alternative 3,4 */
9420 gcc_assert (REGNO (nmask) == REGNO (dest));
9422 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9423 x = gen_rtx_AND (vmode, dest, op0);
9425 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9428 x = gen_rtx_IOR (vmode, dest, scratch);
9429 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9432 /* Return TRUE or FALSE depending on whether the first SET in INSN
9433 has source and destination with matching CC modes, and that the
9434 CC mode is at least as constrained as REQ_MODE. */
9437 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9440 enum machine_mode set_mode;
9442 set = PATTERN (insn);
9443 if (GET_CODE (set) == PARALLEL)
9444 set = XVECEXP (set, 0, 0);
9445 gcc_assert (GET_CODE (set) == SET);
9446 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9448 set_mode = GET_MODE (SET_DEST (set));
9452 if (req_mode != CCNOmode
9453 && (req_mode != CCmode
9454 || XEXP (SET_SRC (set), 1) != const0_rtx))
9458 if (req_mode == CCGCmode)
9462 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9466 if (req_mode == CCZmode)
9476 return (GET_MODE (SET_SRC (set)) == set_mode);
9479 /* Generate insn patterns to do an integer compare of OPERANDS. */
9482 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9484 enum machine_mode cmpmode;
9487 cmpmode = SELECT_CC_MODE (code, op0, op1);
9488 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9490 /* This is very simple, but making the interface the same as in the
9491 FP case makes the rest of the code easier. */
9492 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9493 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9495 /* Return the test that should be put into the flags user, i.e.
9496 the bcc, scc, or cmov instruction. */
9497 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9500 /* Figure out whether to use ordered or unordered fp comparisons.
9501 Return the appropriate mode to use. */
9504 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9506 /* ??? In order to make all comparisons reversible, we do all comparisons
9507 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9508 all forms trapping and nontrapping comparisons, we can make inequality
9509 comparisons trapping again, since it results in better code when using
9510 FCOM based compares. */
9511 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9515 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9517 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9518 return ix86_fp_compare_mode (code);
9521 /* Only zero flag is needed. */
9523 case NE: /* ZF!=0 */
9525 /* Codes needing carry flag. */
9526 case GEU: /* CF=0 */
9527 case GTU: /* CF=0 & ZF=0 */
9528 case LTU: /* CF=1 */
9529 case LEU: /* CF=1 | ZF=1 */
9531 /* Codes possibly doable only with sign flag when
9532 comparing against zero. */
9533 case GE: /* SF=OF or SF=0 */
9534 case LT: /* SF<>OF or SF=1 */
9535 if (op1 == const0_rtx)
9538 /* For other cases Carry flag is not required. */
9540 /* Codes doable only with sign flag when comparing
9541 against zero, but we miss jump instruction for it
9542 so we need to use relational tests against overflow
9543 that thus needs to be zero. */
9544 case GT: /* ZF=0 & SF=OF */
9545 case LE: /* ZF=1 | SF<>OF */
9546 if (op1 == const0_rtx)
9550 /* strcmp pattern do (use flags) and combine may ask us for proper
9559 /* Return the fixed registers used for condition codes. */
9562 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9569 /* If two condition code modes are compatible, return a condition code
9570 mode which is compatible with both. Otherwise, return
9573 static enum machine_mode
9574 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9579 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9582 if ((m1 == CCGCmode && m2 == CCGOCmode)
9583 || (m1 == CCGOCmode && m2 == CCGCmode))
9611 /* These are only compatible with themselves, which we already
9617 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9620 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9622 enum rtx_code swapped_code = swap_condition (code);
9623 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9624 || (ix86_fp_comparison_cost (swapped_code)
9625 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9628 /* Swap, force into registers, or otherwise massage the two operands
9629 to a fp comparison. The operands are updated in place; the new
9630 comparison code is returned. */
9632 static enum rtx_code
9633 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9635 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9636 rtx op0 = *pop0, op1 = *pop1;
9637 enum machine_mode op_mode = GET_MODE (op0);
9638 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9640 /* All of the unordered compare instructions only work on registers.
9641 The same is true of the fcomi compare instructions. The XFmode
9642 compare instructions require registers except when comparing
9643 against zero or when converting operand 1 from fixed point to
9647 && (fpcmp_mode == CCFPUmode
9648 || (op_mode == XFmode
9649 && ! (standard_80387_constant_p (op0) == 1
9650 || standard_80387_constant_p (op1) == 1)
9651 && GET_CODE (op1) != FLOAT)
9652 || ix86_use_fcomi_compare (code)))
9654 op0 = force_reg (op_mode, op0);
9655 op1 = force_reg (op_mode, op1);
9659 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9660 things around if they appear profitable, otherwise force op0
9663 if (standard_80387_constant_p (op0) == 0
9664 || (GET_CODE (op0) == MEM
9665 && ! (standard_80387_constant_p (op1) == 0
9666 || GET_CODE (op1) == MEM)))
9669 tmp = op0, op0 = op1, op1 = tmp;
9670 code = swap_condition (code);
9673 if (GET_CODE (op0) != REG)
9674 op0 = force_reg (op_mode, op0);
9676 if (CONSTANT_P (op1))
9678 int tmp = standard_80387_constant_p (op1);
9680 op1 = validize_mem (force_const_mem (op_mode, op1));
9684 op1 = force_reg (op_mode, op1);
9687 op1 = force_reg (op_mode, op1);
9691 /* Try to rearrange the comparison to make it cheaper. */
9692 if (ix86_fp_comparison_cost (code)
9693 > ix86_fp_comparison_cost (swap_condition (code))
9694 && (GET_CODE (op1) == REG || !no_new_pseudos))
9697 tmp = op0, op0 = op1, op1 = tmp;
9698 code = swap_condition (code);
9699 if (GET_CODE (op0) != REG)
9700 op0 = force_reg (op_mode, op0);
9708 /* Convert comparison codes we use to represent FP comparison to integer
9709 code that will result in proper branch. Return UNKNOWN if no such code
9713 ix86_fp_compare_code_to_integer (enum rtx_code code)
9742 /* Split comparison code CODE into comparisons we can do using branch
9743 instructions. BYPASS_CODE is comparison code for branch that will
9744 branch around FIRST_CODE and SECOND_CODE. If some of branches
9745 is not required, set value to UNKNOWN.
9746 We never require more than two branches. */
9749 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9750 enum rtx_code *first_code,
9751 enum rtx_code *second_code)
9754 *bypass_code = UNKNOWN;
9755 *second_code = UNKNOWN;
9757 /* The fcomi comparison sets flags as follows:
9767 case GT: /* GTU - CF=0 & ZF=0 */
9768 case GE: /* GEU - CF=0 */
9769 case ORDERED: /* PF=0 */
9770 case UNORDERED: /* PF=1 */
9771 case UNEQ: /* EQ - ZF=1 */
9772 case UNLT: /* LTU - CF=1 */
9773 case UNLE: /* LEU - CF=1 | ZF=1 */
9774 case LTGT: /* EQ - ZF=0 */
9776 case LT: /* LTU - CF=1 - fails on unordered */
9778 *bypass_code = UNORDERED;
9780 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9782 *bypass_code = UNORDERED;
9784 case EQ: /* EQ - ZF=1 - fails on unordered */
9786 *bypass_code = UNORDERED;
9788 case NE: /* NE - ZF=0 - fails on unordered */
9790 *second_code = UNORDERED;
9792 case UNGE: /* GEU - CF=0 - fails on unordered */
9794 *second_code = UNORDERED;
9796 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9798 *second_code = UNORDERED;
9803 if (!TARGET_IEEE_FP)
9805 *second_code = UNKNOWN;
9806 *bypass_code = UNKNOWN;
9810 /* Return cost of comparison done fcom + arithmetics operations on AX.
9811 All following functions do use number of instructions as a cost metrics.
9812 In future this should be tweaked to compute bytes for optimize_size and
9813 take into account performance of various instructions on various CPUs. */
9815 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9817 if (!TARGET_IEEE_FP)
9819 /* The cost of code output by ix86_expand_fp_compare. */
9847 /* Return cost of comparison done using fcomi operation.
9848 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9850 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9852 enum rtx_code bypass_code, first_code, second_code;
9853 /* Return arbitrarily high cost when instruction is not supported - this
9854 prevents gcc from using it. */
9857 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9858 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9861 /* Return cost of comparison done using sahf operation.
9862 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9864 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9866 enum rtx_code bypass_code, first_code, second_code;
9867 /* Return arbitrarily high cost when instruction is not preferred - this
9868 avoids gcc from using it. */
9869 if (!TARGET_USE_SAHF && !optimize_size)
9871 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9872 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9875 /* Compute cost of the comparison done using any method.
9876 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9878 ix86_fp_comparison_cost (enum rtx_code code)
9880 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9883 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9884 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9886 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9887 if (min > sahf_cost)
9889 if (min > fcomi_cost)
9894 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9897 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9898 rtx *second_test, rtx *bypass_test)
9900 enum machine_mode fpcmp_mode, intcmp_mode;
9902 int cost = ix86_fp_comparison_cost (code);
9903 enum rtx_code bypass_code, first_code, second_code;
9905 fpcmp_mode = ix86_fp_compare_mode (code);
9906 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9909 *second_test = NULL_RTX;
9911 *bypass_test = NULL_RTX;
9913 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9915 /* Do fcomi/sahf based test when profitable. */
9916 if ((bypass_code == UNKNOWN || bypass_test)
9917 && (second_code == UNKNOWN || second_test)
9918 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9922 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9923 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9929 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9930 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9932 scratch = gen_reg_rtx (HImode);
9933 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9934 emit_insn (gen_x86_sahf_1 (scratch));
9937 /* The FP codes work out to act like unsigned. */
9938 intcmp_mode = fpcmp_mode;
9940 if (bypass_code != UNKNOWN)
9941 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9942 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9944 if (second_code != UNKNOWN)
9945 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9946 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9951 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9952 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9953 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9955 scratch = gen_reg_rtx (HImode);
9956 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9958 /* In the unordered case, we have to check C2 for NaN's, which
9959 doesn't happen to work out to anything nice combination-wise.
9960 So do some bit twiddling on the value we've got in AH to come
9961 up with an appropriate set of condition codes. */
9963 intcmp_mode = CCNOmode;
9968 if (code == GT || !TARGET_IEEE_FP)
9970 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9975 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9976 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9977 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9978 intcmp_mode = CCmode;
9984 if (code == LT && TARGET_IEEE_FP)
9986 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9987 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9988 intcmp_mode = CCmode;
9993 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9999 if (code == GE || !TARGET_IEEE_FP)
10001 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10006 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10007 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10014 if (code == LE && TARGET_IEEE_FP)
10016 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10017 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10018 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10019 intcmp_mode = CCmode;
10024 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10030 if (code == EQ && TARGET_IEEE_FP)
10032 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10033 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10034 intcmp_mode = CCmode;
10039 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10046 if (code == NE && TARGET_IEEE_FP)
10048 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10049 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10055 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10061 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10065 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10070 gcc_unreachable ();
10074 /* Return the test that should be put into the flags user, i.e.
10075 the bcc, scc, or cmov instruction. */
10076 return gen_rtx_fmt_ee (code, VOIDmode,
10077 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10082 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10085 op0 = ix86_compare_op0;
10086 op1 = ix86_compare_op1;
10089 *second_test = NULL_RTX;
10091 *bypass_test = NULL_RTX;
10093 if (ix86_compare_emitted)
10095 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10096 ix86_compare_emitted = NULL_RTX;
10098 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10099 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10100 second_test, bypass_test);
10102 ret = ix86_expand_int_compare (code, op0, op1);
10107 /* Return true if the CODE will result in nontrivial jump sequence. */
10109 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10111 enum rtx_code bypass_code, first_code, second_code;
10114 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10115 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10119 ix86_expand_branch (enum rtx_code code, rtx label)
10123 switch (GET_MODE (ix86_compare_op0))
10129 tmp = ix86_expand_compare (code, NULL, NULL);
10130 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10131 gen_rtx_LABEL_REF (VOIDmode, label),
10133 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10142 enum rtx_code bypass_code, first_code, second_code;
10144 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10145 &ix86_compare_op1);
10147 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10149 /* Check whether we will use the natural sequence with one jump. If
10150 so, we can expand jump early. Otherwise delay expansion by
10151 creating compound insn to not confuse optimizers. */
10152 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10155 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10156 gen_rtx_LABEL_REF (VOIDmode, label),
10157 pc_rtx, NULL_RTX, NULL_RTX);
10161 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10162 ix86_compare_op0, ix86_compare_op1);
10163 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10164 gen_rtx_LABEL_REF (VOIDmode, label),
10166 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10168 use_fcomi = ix86_use_fcomi_compare (code);
10169 vec = rtvec_alloc (3 + !use_fcomi);
10170 RTVEC_ELT (vec, 0) = tmp;
10172 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10174 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10177 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10179 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10188 /* Expand DImode branch into multiple compare+branch. */
10190 rtx lo[2], hi[2], label2;
10191 enum rtx_code code1, code2, code3;
10192 enum machine_mode submode;
10194 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10196 tmp = ix86_compare_op0;
10197 ix86_compare_op0 = ix86_compare_op1;
10198 ix86_compare_op1 = tmp;
10199 code = swap_condition (code);
10201 if (GET_MODE (ix86_compare_op0) == DImode)
10203 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10204 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10209 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10210 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10214 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10215 avoid two branches. This costs one extra insn, so disable when
10216 optimizing for size. */
10218 if ((code == EQ || code == NE)
10220 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10225 if (hi[1] != const0_rtx)
10226 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10227 NULL_RTX, 0, OPTAB_WIDEN);
10230 if (lo[1] != const0_rtx)
10231 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10232 NULL_RTX, 0, OPTAB_WIDEN);
10234 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10235 NULL_RTX, 0, OPTAB_WIDEN);
10237 ix86_compare_op0 = tmp;
10238 ix86_compare_op1 = const0_rtx;
10239 ix86_expand_branch (code, label);
10243 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10244 op1 is a constant and the low word is zero, then we can just
10245 examine the high word. */
10247 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10250 case LT: case LTU: case GE: case GEU:
10251 ix86_compare_op0 = hi[0];
10252 ix86_compare_op1 = hi[1];
10253 ix86_expand_branch (code, label);
10259 /* Otherwise, we need two or three jumps. */
10261 label2 = gen_label_rtx ();
10264 code2 = swap_condition (code);
10265 code3 = unsigned_condition (code);
10269 case LT: case GT: case LTU: case GTU:
10272 case LE: code1 = LT; code2 = GT; break;
10273 case GE: code1 = GT; code2 = LT; break;
10274 case LEU: code1 = LTU; code2 = GTU; break;
10275 case GEU: code1 = GTU; code2 = LTU; break;
10277 case EQ: code1 = UNKNOWN; code2 = NE; break;
10278 case NE: code2 = UNKNOWN; break;
10281 gcc_unreachable ();
10286 * if (hi(a) < hi(b)) goto true;
10287 * if (hi(a) > hi(b)) goto false;
10288 * if (lo(a) < lo(b)) goto true;
10292 ix86_compare_op0 = hi[0];
10293 ix86_compare_op1 = hi[1];
10295 if (code1 != UNKNOWN)
10296 ix86_expand_branch (code1, label);
10297 if (code2 != UNKNOWN)
10298 ix86_expand_branch (code2, label2);
10300 ix86_compare_op0 = lo[0];
10301 ix86_compare_op1 = lo[1];
10302 ix86_expand_branch (code3, label);
10304 if (code2 != UNKNOWN)
10305 emit_label (label2);
10310 gcc_unreachable ();
10314 /* Split branch based on floating point condition. */
10316 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10317 rtx target1, rtx target2, rtx tmp, rtx pushed)
10319 rtx second, bypass;
10320 rtx label = NULL_RTX;
10322 int bypass_probability = -1, second_probability = -1, probability = -1;
10325 if (target2 != pc_rtx)
10328 code = reverse_condition_maybe_unordered (code);
10333 condition = ix86_expand_fp_compare (code, op1, op2,
10334 tmp, &second, &bypass);
10336 /* Remove pushed operand from stack. */
10338 ix86_free_from_memory (GET_MODE (pushed));
10340 if (split_branch_probability >= 0)
10342 /* Distribute the probabilities across the jumps.
10343 Assume the BYPASS and SECOND to be always test
10345 probability = split_branch_probability;
10347 /* Value of 1 is low enough to make no need for probability
10348 to be updated. Later we may run some experiments and see
10349 if unordered values are more frequent in practice. */
10351 bypass_probability = 1;
10353 second_probability = 1;
10355 if (bypass != NULL_RTX)
10357 label = gen_label_rtx ();
10358 i = emit_jump_insn (gen_rtx_SET
10360 gen_rtx_IF_THEN_ELSE (VOIDmode,
10362 gen_rtx_LABEL_REF (VOIDmode,
10365 if (bypass_probability >= 0)
10367 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10368 GEN_INT (bypass_probability),
10371 i = emit_jump_insn (gen_rtx_SET
10373 gen_rtx_IF_THEN_ELSE (VOIDmode,
10374 condition, target1, target2)));
10375 if (probability >= 0)
10377 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10378 GEN_INT (probability),
10380 if (second != NULL_RTX)
10382 i = emit_jump_insn (gen_rtx_SET
10384 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10386 if (second_probability >= 0)
10388 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10389 GEN_INT (second_probability),
10392 if (label != NULL_RTX)
10393 emit_label (label);
10397 ix86_expand_setcc (enum rtx_code code, rtx dest)
10399 rtx ret, tmp, tmpreg, equiv;
10400 rtx second_test, bypass_test;
10402 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10403 return 0; /* FAIL */
10405 gcc_assert (GET_MODE (dest) == QImode);
10407 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10408 PUT_MODE (ret, QImode);
10413 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10414 if (bypass_test || second_test)
10416 rtx test = second_test;
10418 rtx tmp2 = gen_reg_rtx (QImode);
10421 gcc_assert (!second_test);
10422 test = bypass_test;
10424 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10426 PUT_MODE (test, QImode);
10427 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10430 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10432 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10435 /* Attach a REG_EQUAL note describing the comparison result. */
10436 if (ix86_compare_op0 && ix86_compare_op1)
10438 equiv = simplify_gen_relational (code, QImode,
10439 GET_MODE (ix86_compare_op0),
10440 ix86_compare_op0, ix86_compare_op1);
10441 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10444 return 1; /* DONE */
10447 /* Expand comparison setting or clearing carry flag. Return true when
10448 successful and set pop for the operation. */
10450 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10452 enum machine_mode mode =
10453 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10455 /* Do not handle DImode compares that go trought special path. Also we can't
10456 deal with FP compares yet. This is possible to add. */
10457 if (mode == (TARGET_64BIT ? TImode : DImode))
10459 if (FLOAT_MODE_P (mode))
10461 rtx second_test = NULL, bypass_test = NULL;
10462 rtx compare_op, compare_seq;
10464 /* Shortcut: following common codes never translate into carry flag compares. */
10465 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10466 || code == ORDERED || code == UNORDERED)
10469 /* These comparisons require zero flag; swap operands so they won't. */
10470 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10471 && !TARGET_IEEE_FP)
10476 code = swap_condition (code);
10479 /* Try to expand the comparison and verify that we end up with carry flag
10480 based comparison. This is fails to be true only when we decide to expand
10481 comparison using arithmetic that is not too common scenario. */
10483 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10484 &second_test, &bypass_test);
10485 compare_seq = get_insns ();
10488 if (second_test || bypass_test)
10490 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10491 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10492 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10494 code = GET_CODE (compare_op);
10495 if (code != LTU && code != GEU)
10497 emit_insn (compare_seq);
10501 if (!INTEGRAL_MODE_P (mode))
10509 /* Convert a==0 into (unsigned)a<1. */
10512 if (op1 != const0_rtx)
10515 code = (code == EQ ? LTU : GEU);
10518 /* Convert a>b into b<a or a>=b-1. */
10521 if (GET_CODE (op1) == CONST_INT)
10523 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10524 /* Bail out on overflow. We still can swap operands but that
10525 would force loading of the constant into register. */
10526 if (op1 == const0_rtx
10527 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10529 code = (code == GTU ? GEU : LTU);
10536 code = (code == GTU ? LTU : GEU);
10540 /* Convert a>=0 into (unsigned)a<0x80000000. */
10543 if (mode == DImode || op1 != const0_rtx)
10545 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10546 code = (code == LT ? GEU : LTU);
10550 if (mode == DImode || op1 != constm1_rtx)
10552 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10553 code = (code == LE ? GEU : LTU);
10559 /* Swapping operands may cause constant to appear as first operand. */
10560 if (!nonimmediate_operand (op0, VOIDmode))
10562 if (no_new_pseudos)
10564 op0 = force_reg (mode, op0);
10566 ix86_compare_op0 = op0;
10567 ix86_compare_op1 = op1;
10568 *pop = ix86_expand_compare (code, NULL, NULL);
10569 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10574 ix86_expand_int_movcc (rtx operands[])
10576 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10577 rtx compare_seq, compare_op;
10578 rtx second_test, bypass_test;
10579 enum machine_mode mode = GET_MODE (operands[0]);
10580 bool sign_bit_compare_p = false;;
10583 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10584 compare_seq = get_insns ();
10587 compare_code = GET_CODE (compare_op);
10589 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10590 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10591 sign_bit_compare_p = true;
10593 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10594 HImode insns, we'd be swallowed in word prefix ops. */
10596 if ((mode != HImode || TARGET_FAST_PREFIX)
10597 && (mode != (TARGET_64BIT ? TImode : DImode))
10598 && GET_CODE (operands[2]) == CONST_INT
10599 && GET_CODE (operands[3]) == CONST_INT)
10601 rtx out = operands[0];
10602 HOST_WIDE_INT ct = INTVAL (operands[2]);
10603 HOST_WIDE_INT cf = INTVAL (operands[3]);
10604 HOST_WIDE_INT diff;
10607 /* Sign bit compares are better done using shifts than we do by using
10609 if (sign_bit_compare_p
10610 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10611 ix86_compare_op1, &compare_op))
10613 /* Detect overlap between destination and compare sources. */
10616 if (!sign_bit_compare_p)
10618 bool fpcmp = false;
10620 compare_code = GET_CODE (compare_op);
10622 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10623 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10626 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10629 /* To simplify rest of code, restrict to the GEU case. */
10630 if (compare_code == LTU)
10632 HOST_WIDE_INT tmp = ct;
10635 compare_code = reverse_condition (compare_code);
10636 code = reverse_condition (code);
10641 PUT_CODE (compare_op,
10642 reverse_condition_maybe_unordered
10643 (GET_CODE (compare_op)));
10645 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10649 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10650 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10651 tmp = gen_reg_rtx (mode);
10653 if (mode == DImode)
10654 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10656 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10660 if (code == GT || code == GE)
10661 code = reverse_condition (code);
10664 HOST_WIDE_INT tmp = ct;
10669 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10670 ix86_compare_op1, VOIDmode, 0, -1);
10683 tmp = expand_simple_binop (mode, PLUS,
10685 copy_rtx (tmp), 1, OPTAB_DIRECT);
10696 tmp = expand_simple_binop (mode, IOR,
10698 copy_rtx (tmp), 1, OPTAB_DIRECT);
10700 else if (diff == -1 && ct)
10710 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10712 tmp = expand_simple_binop (mode, PLUS,
10713 copy_rtx (tmp), GEN_INT (cf),
10714 copy_rtx (tmp), 1, OPTAB_DIRECT);
10722 * andl cf - ct, dest
10732 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10735 tmp = expand_simple_binop (mode, AND,
10737 gen_int_mode (cf - ct, mode),
10738 copy_rtx (tmp), 1, OPTAB_DIRECT);
10740 tmp = expand_simple_binop (mode, PLUS,
10741 copy_rtx (tmp), GEN_INT (ct),
10742 copy_rtx (tmp), 1, OPTAB_DIRECT);
10745 if (!rtx_equal_p (tmp, out))
10746 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10748 return 1; /* DONE */
10754 tmp = ct, ct = cf, cf = tmp;
10756 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10758 /* We may be reversing unordered compare to normal compare, that
10759 is not valid in general (we may convert non-trapping condition
10760 to trapping one), however on i386 we currently emit all
10761 comparisons unordered. */
10762 compare_code = reverse_condition_maybe_unordered (compare_code);
10763 code = reverse_condition_maybe_unordered (code);
10767 compare_code = reverse_condition (compare_code);
10768 code = reverse_condition (code);
10772 compare_code = UNKNOWN;
10773 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10774 && GET_CODE (ix86_compare_op1) == CONST_INT)
10776 if (ix86_compare_op1 == const0_rtx
10777 && (code == LT || code == GE))
10778 compare_code = code;
10779 else if (ix86_compare_op1 == constm1_rtx)
10783 else if (code == GT)
10788 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10789 if (compare_code != UNKNOWN
10790 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10791 && (cf == -1 || ct == -1))
10793 /* If lea code below could be used, only optimize
10794 if it results in a 2 insn sequence. */
10796 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10797 || diff == 3 || diff == 5 || diff == 9)
10798 || (compare_code == LT && ct == -1)
10799 || (compare_code == GE && cf == -1))
10802 * notl op1 (if necessary)
10810 code = reverse_condition (code);
10813 out = emit_store_flag (out, code, ix86_compare_op0,
10814 ix86_compare_op1, VOIDmode, 0, -1);
10816 out = expand_simple_binop (mode, IOR,
10818 out, 1, OPTAB_DIRECT);
10819 if (out != operands[0])
10820 emit_move_insn (operands[0], out);
10822 return 1; /* DONE */
10827 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10828 || diff == 3 || diff == 5 || diff == 9)
10829 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10831 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10837 * lea cf(dest*(ct-cf)),dest
10841 * This also catches the degenerate setcc-only case.
10847 out = emit_store_flag (out, code, ix86_compare_op0,
10848 ix86_compare_op1, VOIDmode, 0, 1);
10851 /* On x86_64 the lea instruction operates on Pmode, so we need
10852 to get arithmetics done in proper mode to match. */
10854 tmp = copy_rtx (out);
10858 out1 = copy_rtx (out);
10859 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10863 tmp = gen_rtx_PLUS (mode, tmp, out1);
10869 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10872 if (!rtx_equal_p (tmp, out))
10875 out = force_operand (tmp, copy_rtx (out));
10877 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10879 if (!rtx_equal_p (out, operands[0]))
10880 emit_move_insn (operands[0], copy_rtx (out));
10882 return 1; /* DONE */
10886 * General case: Jumpful:
10887 * xorl dest,dest cmpl op1, op2
10888 * cmpl op1, op2 movl ct, dest
10889 * setcc dest jcc 1f
10890 * decl dest movl cf, dest
10891 * andl (cf-ct),dest 1:
10894 * Size 20. Size 14.
10896 * This is reasonably steep, but branch mispredict costs are
10897 * high on modern cpus, so consider failing only if optimizing
10901 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10902 && BRANCH_COST >= 2)
10908 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10909 /* We may be reversing unordered compare to normal compare,
10910 that is not valid in general (we may convert non-trapping
10911 condition to trapping one), however on i386 we currently
10912 emit all comparisons unordered. */
10913 code = reverse_condition_maybe_unordered (code);
10916 code = reverse_condition (code);
10917 if (compare_code != UNKNOWN)
10918 compare_code = reverse_condition (compare_code);
10922 if (compare_code != UNKNOWN)
10924 /* notl op1 (if needed)
10929 For x < 0 (resp. x <= -1) there will be no notl,
10930 so if possible swap the constants to get rid of the
10932 True/false will be -1/0 while code below (store flag
10933 followed by decrement) is 0/-1, so the constants need
10934 to be exchanged once more. */
10936 if (compare_code == GE || !cf)
10938 code = reverse_condition (code);
10943 HOST_WIDE_INT tmp = cf;
10948 out = emit_store_flag (out, code, ix86_compare_op0,
10949 ix86_compare_op1, VOIDmode, 0, -1);
10953 out = emit_store_flag (out, code, ix86_compare_op0,
10954 ix86_compare_op1, VOIDmode, 0, 1);
10956 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10957 copy_rtx (out), 1, OPTAB_DIRECT);
10960 out = expand_simple_binop (mode, AND, copy_rtx (out),
10961 gen_int_mode (cf - ct, mode),
10962 copy_rtx (out), 1, OPTAB_DIRECT);
10964 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10965 copy_rtx (out), 1, OPTAB_DIRECT);
10966 if (!rtx_equal_p (out, operands[0]))
10967 emit_move_insn (operands[0], copy_rtx (out));
10969 return 1; /* DONE */
10973 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10975 /* Try a few things more with specific constants and a variable. */
10978 rtx var, orig_out, out, tmp;
10980 if (BRANCH_COST <= 2)
10981 return 0; /* FAIL */
10983 /* If one of the two operands is an interesting constant, load a
10984 constant with the above and mask it in with a logical operation. */
10986 if (GET_CODE (operands[2]) == CONST_INT)
10989 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10990 operands[3] = constm1_rtx, op = and_optab;
10991 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10992 operands[3] = const0_rtx, op = ior_optab;
10994 return 0; /* FAIL */
10996 else if (GET_CODE (operands[3]) == CONST_INT)
10999 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11000 operands[2] = constm1_rtx, op = and_optab;
11001 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11002 operands[2] = const0_rtx, op = ior_optab;
11004 return 0; /* FAIL */
11007 return 0; /* FAIL */
11009 orig_out = operands[0];
11010 tmp = gen_reg_rtx (mode);
11013 /* Recurse to get the constant loaded. */
11014 if (ix86_expand_int_movcc (operands) == 0)
11015 return 0; /* FAIL */
11017 /* Mask in the interesting variable. */
11018 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11020 if (!rtx_equal_p (out, orig_out))
11021 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11023 return 1; /* DONE */
11027 * For comparison with above,
11037 if (! nonimmediate_operand (operands[2], mode))
11038 operands[2] = force_reg (mode, operands[2]);
11039 if (! nonimmediate_operand (operands[3], mode))
11040 operands[3] = force_reg (mode, operands[3]);
11042 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11044 rtx tmp = gen_reg_rtx (mode);
11045 emit_move_insn (tmp, operands[3]);
11048 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11050 rtx tmp = gen_reg_rtx (mode);
11051 emit_move_insn (tmp, operands[2]);
11055 if (! register_operand (operands[2], VOIDmode)
11057 || ! register_operand (operands[3], VOIDmode)))
11058 operands[2] = force_reg (mode, operands[2]);
11061 && ! register_operand (operands[3], VOIDmode))
11062 operands[3] = force_reg (mode, operands[3]);
11064 emit_insn (compare_seq);
11065 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11066 gen_rtx_IF_THEN_ELSE (mode,
11067 compare_op, operands[2],
11070 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11071 gen_rtx_IF_THEN_ELSE (mode,
11073 copy_rtx (operands[3]),
11074 copy_rtx (operands[0]))));
11076 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11077 gen_rtx_IF_THEN_ELSE (mode,
11079 copy_rtx (operands[2]),
11080 copy_rtx (operands[0]))));
11082 return 1; /* DONE */
11085 /* Swap, force into registers, or otherwise massage the two operands
11086 to an sse comparison with a mask result. Thus we differ a bit from
11087 ix86_prepare_fp_compare_args which expects to produce a flags result.
11089 The DEST operand exists to help determine whether to commute commutative
11090 operators. The POP0/POP1 operands are updated in place. The new
11091 comparison code is returned, or UNKNOWN if not implementable. */
11093 static enum rtx_code
11094 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11095 rtx *pop0, rtx *pop1)
11103 /* We have no LTGT as an operator. We could implement it with
11104 NE & ORDERED, but this requires an extra temporary. It's
11105 not clear that it's worth it. */
11112 /* These are supported directly. */
11119 /* For commutative operators, try to canonicalize the destination
11120 operand to be first in the comparison - this helps reload to
11121 avoid extra moves. */
11122 if (!dest || !rtx_equal_p (dest, *pop1))
11130 /* These are not supported directly. Swap the comparison operands
11131 to transform into something that is supported. */
11135 code = swap_condition (code);
11139 gcc_unreachable ();
11145 /* Detect conditional moves that exactly match min/max operational
11146 semantics. Note that this is IEEE safe, as long as we don't
11147 interchange the operands.
11149 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11150 and TRUE if the operation is successful and instructions are emitted. */
11153 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11154 rtx cmp_op1, rtx if_true, rtx if_false)
11156 enum machine_mode mode;
11162 else if (code == UNGE)
11165 if_true = if_false;
11171 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11173 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11178 mode = GET_MODE (dest);
11180 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11181 but MODE may be a vector mode and thus not appropriate. */
11182 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11184 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11187 if_true = force_reg (mode, if_true);
11188 v = gen_rtvec (2, if_true, if_false);
11189 tmp = gen_rtx_UNSPEC (mode, v, u);
11193 code = is_min ? SMIN : SMAX;
11194 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11197 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11201 /* Expand an sse vector comparison. Return the register with the result. */
11204 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11205 rtx op_true, rtx op_false)
11207 enum machine_mode mode = GET_MODE (dest);
11210 cmp_op0 = force_reg (mode, cmp_op0);
11211 if (!nonimmediate_operand (cmp_op1, mode))
11212 cmp_op1 = force_reg (mode, cmp_op1);
11215 || reg_overlap_mentioned_p (dest, op_true)
11216 || reg_overlap_mentioned_p (dest, op_false))
11217 dest = gen_reg_rtx (mode);
11219 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11220 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11225 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11226 operations. This is used for both scalar and vector conditional moves. */
11229 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11231 enum machine_mode mode = GET_MODE (dest);
11234 if (op_false == CONST0_RTX (mode))
11236 op_true = force_reg (mode, op_true);
11237 x = gen_rtx_AND (mode, cmp, op_true);
11238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11240 else if (op_true == CONST0_RTX (mode))
11242 op_false = force_reg (mode, op_false);
11243 x = gen_rtx_NOT (mode, cmp);
11244 x = gen_rtx_AND (mode, x, op_false);
11245 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11249 op_true = force_reg (mode, op_true);
11250 op_false = force_reg (mode, op_false);
11252 t2 = gen_reg_rtx (mode);
11254 t3 = gen_reg_rtx (mode);
11258 x = gen_rtx_AND (mode, op_true, cmp);
11259 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11261 x = gen_rtx_NOT (mode, cmp);
11262 x = gen_rtx_AND (mode, x, op_false);
11263 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11265 x = gen_rtx_IOR (mode, t3, t2);
11266 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11270 /* Expand a floating-point conditional move. Return true if successful. */
11273 ix86_expand_fp_movcc (rtx operands[])
11275 enum machine_mode mode = GET_MODE (operands[0]);
11276 enum rtx_code code = GET_CODE (operands[1]);
11277 rtx tmp, compare_op, second_test, bypass_test;
11279 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11281 enum machine_mode cmode;
11283 /* Since we've no cmove for sse registers, don't force bad register
11284 allocation just to gain access to it. Deny movcc when the
11285 comparison mode doesn't match the move mode. */
11286 cmode = GET_MODE (ix86_compare_op0);
11287 if (cmode == VOIDmode)
11288 cmode = GET_MODE (ix86_compare_op1);
11292 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11294 &ix86_compare_op1);
11295 if (code == UNKNOWN)
11298 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11299 ix86_compare_op1, operands[2],
11303 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11304 ix86_compare_op1, operands[2], operands[3]);
11305 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11309 /* The floating point conditional move instructions don't directly
11310 support conditions resulting from a signed integer comparison. */
11312 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11314 /* The floating point conditional move instructions don't directly
11315 support signed integer comparisons. */
11317 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11319 gcc_assert (!second_test && !bypass_test);
11320 tmp = gen_reg_rtx (QImode);
11321 ix86_expand_setcc (code, tmp);
11323 ix86_compare_op0 = tmp;
11324 ix86_compare_op1 = const0_rtx;
11325 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11327 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11329 tmp = gen_reg_rtx (mode);
11330 emit_move_insn (tmp, operands[3]);
11333 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11335 tmp = gen_reg_rtx (mode);
11336 emit_move_insn (tmp, operands[2]);
11340 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11341 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11342 operands[2], operands[3])));
11344 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11345 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11346 operands[3], operands[0])));
11348 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11349 gen_rtx_IF_THEN_ELSE (mode, second_test,
11350 operands[2], operands[0])));
11355 /* Expand a floating-point vector conditional move; a vcond operation
11356 rather than a movcc operation. */
11359 ix86_expand_fp_vcond (rtx operands[])
11361 enum rtx_code code = GET_CODE (operands[3]);
11364 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11365 &operands[4], &operands[5]);
11366 if (code == UNKNOWN)
11369 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11370 operands[5], operands[1], operands[2]))
11373 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11374 operands[1], operands[2]);
11375 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11379 /* Expand a signed integral vector conditional move. */
11382 ix86_expand_int_vcond (rtx operands[])
11384 enum machine_mode mode = GET_MODE (operands[0]);
11385 enum rtx_code code = GET_CODE (operands[3]);
11386 bool negate = false;
11389 cop0 = operands[4];
11390 cop1 = operands[5];
11392 /* Canonicalize the comparison to EQ, GT, GTU. */
11403 code = reverse_condition (code);
11409 code = reverse_condition (code);
11415 code = swap_condition (code);
11416 x = cop0, cop0 = cop1, cop1 = x;
11420 gcc_unreachable ();
11423 /* Unsigned parallel compare is not supported by the hardware. Play some
11424 tricks to turn this into a signed comparison against 0. */
11433 /* Perform a parallel modulo subtraction. */
11434 t1 = gen_reg_rtx (mode);
11435 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11437 /* Extract the original sign bit of op0. */
11438 mask = GEN_INT (-0x80000000);
11439 mask = gen_rtx_CONST_VECTOR (mode,
11440 gen_rtvec (4, mask, mask, mask, mask));
11441 mask = force_reg (mode, mask);
11442 t2 = gen_reg_rtx (mode);
11443 emit_insn (gen_andv4si3 (t2, cop0, mask));
11445 /* XOR it back into the result of the subtraction. This results
11446 in the sign bit set iff we saw unsigned underflow. */
11447 x = gen_reg_rtx (mode);
11448 emit_insn (gen_xorv4si3 (x, t1, t2));
11456 /* Perform a parallel unsigned saturating subtraction. */
11457 x = gen_reg_rtx (mode);
11458 emit_insn (gen_rtx_SET (VOIDmode, x,
11459 gen_rtx_US_MINUS (mode, cop0, cop1)));
11466 gcc_unreachable ();
11470 cop1 = CONST0_RTX (mode);
11473 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11474 operands[1+negate], operands[2-negate]);
11476 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11477 operands[2-negate]);
11481 /* Expand conditional increment or decrement using adb/sbb instructions.
11482 The default case using setcc followed by the conditional move can be
11483 done by generic code. */
11485 ix86_expand_int_addcc (rtx operands[])
11487 enum rtx_code code = GET_CODE (operands[1]);
11489 rtx val = const0_rtx;
11490 bool fpcmp = false;
11491 enum machine_mode mode = GET_MODE (operands[0]);
11493 if (operands[3] != const1_rtx
11494 && operands[3] != constm1_rtx)
11496 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11497 ix86_compare_op1, &compare_op))
11499 code = GET_CODE (compare_op);
11501 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11502 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11505 code = ix86_fp_compare_code_to_integer (code);
11512 PUT_CODE (compare_op,
11513 reverse_condition_maybe_unordered
11514 (GET_CODE (compare_op)));
11516 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11518 PUT_MODE (compare_op, mode);
11520 /* Construct either adc or sbb insn. */
11521 if ((code == LTU) == (operands[3] == constm1_rtx))
11523 switch (GET_MODE (operands[0]))
11526 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11529 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11532 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11535 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11538 gcc_unreachable ();
11543 switch (GET_MODE (operands[0]))
11546 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11549 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11552 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11555 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11558 gcc_unreachable ();
11561 return 1; /* DONE */
11565 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11566 works for floating pointer parameters and nonoffsetable memories.
11567 For pushes, it returns just stack offsets; the values will be saved
11568 in the right order. Maximally three parts are generated. */
11571 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11576 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11578 size = (GET_MODE_SIZE (mode) + 4) / 8;
11580 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11581 gcc_assert (size >= 2 && size <= 3);
11583 /* Optimize constant pool reference to immediates. This is used by fp
11584 moves, that force all constants to memory to allow combining. */
11585 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11587 rtx tmp = maybe_get_pool_constant (operand);
11592 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11594 /* The only non-offsetable memories we handle are pushes. */
11595 int ok = push_operand (operand, VOIDmode);
11599 operand = copy_rtx (operand);
11600 PUT_MODE (operand, Pmode);
11601 parts[0] = parts[1] = parts[2] = operand;
11605 if (GET_CODE (operand) == CONST_VECTOR)
11607 enum machine_mode imode = int_mode_for_mode (mode);
11608 /* Caution: if we looked through a constant pool memory above,
11609 the operand may actually have a different mode now. That's
11610 ok, since we want to pun this all the way back to an integer. */
11611 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11612 gcc_assert (operand != NULL);
11618 if (mode == DImode)
11619 split_di (&operand, 1, &parts[0], &parts[1]);
11622 if (REG_P (operand))
11624 gcc_assert (reload_completed);
11625 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11626 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11628 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11630 else if (offsettable_memref_p (operand))
11632 operand = adjust_address (operand, SImode, 0);
11633 parts[0] = operand;
11634 parts[1] = adjust_address (operand, SImode, 4);
11636 parts[2] = adjust_address (operand, SImode, 8);
11638 else if (GET_CODE (operand) == CONST_DOUBLE)
11643 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11647 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11648 parts[2] = gen_int_mode (l[2], SImode);
11651 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11654 gcc_unreachable ();
11656 parts[1] = gen_int_mode (l[1], SImode);
11657 parts[0] = gen_int_mode (l[0], SImode);
11660 gcc_unreachable ();
11665 if (mode == TImode)
11666 split_ti (&operand, 1, &parts[0], &parts[1]);
11667 if (mode == XFmode || mode == TFmode)
11669 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11670 if (REG_P (operand))
11672 gcc_assert (reload_completed);
11673 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11674 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11676 else if (offsettable_memref_p (operand))
11678 operand = adjust_address (operand, DImode, 0);
11679 parts[0] = operand;
11680 parts[1] = adjust_address (operand, upper_mode, 8);
11682 else if (GET_CODE (operand) == CONST_DOUBLE)
11687 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11688 real_to_target (l, &r, mode);
11690 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11691 if (HOST_BITS_PER_WIDE_INT >= 64)
11694 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11695 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11698 parts[0] = immed_double_const (l[0], l[1], DImode);
11700 if (upper_mode == SImode)
11701 parts[1] = gen_int_mode (l[2], SImode);
11702 else if (HOST_BITS_PER_WIDE_INT >= 64)
11705 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11706 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11709 parts[1] = immed_double_const (l[2], l[3], DImode);
11712 gcc_unreachable ();
11719 /* Emit insns to perform a move or push of DI, DF, and XF values.
11720 Return false when normal moves are needed; true when all required
11721 insns have been emitted. Operands 2-4 contain the input values
11722 int the correct order; operands 5-7 contain the output values. */
11725 ix86_split_long_move (rtx operands[])
11730 int collisions = 0;
11731 enum machine_mode mode = GET_MODE (operands[0]);
11733 /* The DFmode expanders may ask us to move double.
11734 For 64bit target this is single move. By hiding the fact
11735 here we simplify i386.md splitters. */
11736 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11738 /* Optimize constant pool reference to immediates. This is used by
11739 fp moves, that force all constants to memory to allow combining. */
11741 if (GET_CODE (operands[1]) == MEM
11742 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11743 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11744 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11745 if (push_operand (operands[0], VOIDmode))
11747 operands[0] = copy_rtx (operands[0]);
11748 PUT_MODE (operands[0], Pmode);
11751 operands[0] = gen_lowpart (DImode, operands[0]);
11752 operands[1] = gen_lowpart (DImode, operands[1]);
11753 emit_move_insn (operands[0], operands[1]);
11757 /* The only non-offsettable memory we handle is push. */
11758 if (push_operand (operands[0], VOIDmode))
11761 gcc_assert (GET_CODE (operands[0]) != MEM
11762 || offsettable_memref_p (operands[0]));
11764 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11765 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11767 /* When emitting push, take care for source operands on the stack. */
11768 if (push && GET_CODE (operands[1]) == MEM
11769 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11772 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11773 XEXP (part[1][2], 0));
11774 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11775 XEXP (part[1][1], 0));
11778 /* We need to do copy in the right order in case an address register
11779 of the source overlaps the destination. */
11780 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11782 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11784 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11787 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11790 /* Collision in the middle part can be handled by reordering. */
11791 if (collisions == 1 && nparts == 3
11792 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11795 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11796 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11799 /* If there are more collisions, we can't handle it by reordering.
11800 Do an lea to the last part and use only one colliding move. */
11801 else if (collisions > 1)
11807 base = part[0][nparts - 1];
11809 /* Handle the case when the last part isn't valid for lea.
11810 Happens in 64-bit mode storing the 12-byte XFmode. */
11811 if (GET_MODE (base) != Pmode)
11812 base = gen_rtx_REG (Pmode, REGNO (base));
11814 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11815 part[1][0] = replace_equiv_address (part[1][0], base);
11816 part[1][1] = replace_equiv_address (part[1][1],
11817 plus_constant (base, UNITS_PER_WORD));
11819 part[1][2] = replace_equiv_address (part[1][2],
11820 plus_constant (base, 8));
11830 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11831 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11832 emit_move_insn (part[0][2], part[1][2]);
11837 /* In 64bit mode we don't have 32bit push available. In case this is
11838 register, it is OK - we will just use larger counterpart. We also
11839 retype memory - these comes from attempt to avoid REX prefix on
11840 moving of second half of TFmode value. */
11841 if (GET_MODE (part[1][1]) == SImode)
11843 switch (GET_CODE (part[1][1]))
11846 part[1][1] = adjust_address (part[1][1], DImode, 0);
11850 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11854 gcc_unreachable ();
11857 if (GET_MODE (part[1][0]) == SImode)
11858 part[1][0] = part[1][1];
11861 emit_move_insn (part[0][1], part[1][1]);
11862 emit_move_insn (part[0][0], part[1][0]);
11866 /* Choose correct order to not overwrite the source before it is copied. */
11867 if ((REG_P (part[0][0])
11868 && REG_P (part[1][1])
11869 && (REGNO (part[0][0]) == REGNO (part[1][1])
11871 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11873 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11877 operands[2] = part[0][2];
11878 operands[3] = part[0][1];
11879 operands[4] = part[0][0];
11880 operands[5] = part[1][2];
11881 operands[6] = part[1][1];
11882 operands[7] = part[1][0];
11886 operands[2] = part[0][1];
11887 operands[3] = part[0][0];
11888 operands[5] = part[1][1];
11889 operands[6] = part[1][0];
11896 operands[2] = part[0][0];
11897 operands[3] = part[0][1];
11898 operands[4] = part[0][2];
11899 operands[5] = part[1][0];
11900 operands[6] = part[1][1];
11901 operands[7] = part[1][2];
11905 operands[2] = part[0][0];
11906 operands[3] = part[0][1];
11907 operands[5] = part[1][0];
11908 operands[6] = part[1][1];
11912 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11915 if (GET_CODE (operands[5]) == CONST_INT
11916 && operands[5] != const0_rtx
11917 && REG_P (operands[2]))
11919 if (GET_CODE (operands[6]) == CONST_INT
11920 && INTVAL (operands[6]) == INTVAL (operands[5]))
11921 operands[6] = operands[2];
11924 && GET_CODE (operands[7]) == CONST_INT
11925 && INTVAL (operands[7]) == INTVAL (operands[5]))
11926 operands[7] = operands[2];
11930 && GET_CODE (operands[6]) == CONST_INT
11931 && operands[6] != const0_rtx
11932 && REG_P (operands[3])
11933 && GET_CODE (operands[7]) == CONST_INT
11934 && INTVAL (operands[7]) == INTVAL (operands[6]))
11935 operands[7] = operands[3];
11938 emit_move_insn (operands[2], operands[5]);
11939 emit_move_insn (operands[3], operands[6]);
11941 emit_move_insn (operands[4], operands[7]);
11946 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11947 left shift by a constant, either using a single shift or
11948 a sequence of add instructions. */
11951 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11955 emit_insn ((mode == DImode
11957 : gen_adddi3) (operand, operand, operand));
11959 else if (!optimize_size
11960 && count * ix86_cost->add <= ix86_cost->shift_const)
11963 for (i=0; i<count; i++)
11965 emit_insn ((mode == DImode
11967 : gen_adddi3) (operand, operand, operand));
11971 emit_insn ((mode == DImode
11973 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11977 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11979 rtx low[2], high[2];
11981 const int single_width = mode == DImode ? 32 : 64;
11983 if (GET_CODE (operands[2]) == CONST_INT)
11985 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11986 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11988 if (count >= single_width)
11990 emit_move_insn (high[0], low[1]);
11991 emit_move_insn (low[0], const0_rtx);
11993 if (count > single_width)
11994 ix86_expand_ashl_const (high[0], count - single_width, mode);
11998 if (!rtx_equal_p (operands[0], operands[1]))
11999 emit_move_insn (operands[0], operands[1]);
12000 emit_insn ((mode == DImode
12002 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12003 ix86_expand_ashl_const (low[0], count, mode);
12008 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12010 if (operands[1] == const1_rtx)
12012 /* Assuming we've chosen a QImode capable registers, then 1 << N
12013 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12014 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12016 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12018 ix86_expand_clear (low[0]);
12019 ix86_expand_clear (high[0]);
12020 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12022 d = gen_lowpart (QImode, low[0]);
12023 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12024 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12025 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12027 d = gen_lowpart (QImode, high[0]);
12028 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12029 s = gen_rtx_NE (QImode, flags, const0_rtx);
12030 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12033 /* Otherwise, we can get the same results by manually performing
12034 a bit extract operation on bit 5/6, and then performing the two
12035 shifts. The two methods of getting 0/1 into low/high are exactly
12036 the same size. Avoiding the shift in the bit extract case helps
12037 pentium4 a bit; no one else seems to care much either way. */
12042 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12043 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12045 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12046 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12048 emit_insn ((mode == DImode
12050 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12051 emit_insn ((mode == DImode
12053 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12054 emit_move_insn (low[0], high[0]);
12055 emit_insn ((mode == DImode
12057 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12060 emit_insn ((mode == DImode
12062 : gen_ashldi3) (low[0], low[0], operands[2]));
12063 emit_insn ((mode == DImode
12065 : gen_ashldi3) (high[0], high[0], operands[2]));
12069 if (operands[1] == constm1_rtx)
12071 /* For -1 << N, we can avoid the shld instruction, because we
12072 know that we're shifting 0...31/63 ones into a -1. */
12073 emit_move_insn (low[0], constm1_rtx);
12075 emit_move_insn (high[0], low[0]);
12077 emit_move_insn (high[0], constm1_rtx);
12081 if (!rtx_equal_p (operands[0], operands[1]))
12082 emit_move_insn (operands[0], operands[1]);
12084 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12085 emit_insn ((mode == DImode
12087 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12090 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12092 if (TARGET_CMOVE && scratch)
12094 ix86_expand_clear (scratch);
12095 emit_insn ((mode == DImode
12096 ? gen_x86_shift_adj_1
12097 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12100 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12104 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12106 rtx low[2], high[2];
12108 const int single_width = mode == DImode ? 32 : 64;
12110 if (GET_CODE (operands[2]) == CONST_INT)
12112 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12113 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12115 if (count == single_width * 2 - 1)
12117 emit_move_insn (high[0], high[1]);
12118 emit_insn ((mode == DImode
12120 : gen_ashrdi3) (high[0], high[0],
12121 GEN_INT (single_width - 1)));
12122 emit_move_insn (low[0], high[0]);
12125 else if (count >= single_width)
12127 emit_move_insn (low[0], high[1]);
12128 emit_move_insn (high[0], low[0]);
12129 emit_insn ((mode == DImode
12131 : gen_ashrdi3) (high[0], high[0],
12132 GEN_INT (single_width - 1)));
12133 if (count > single_width)
12134 emit_insn ((mode == DImode
12136 : gen_ashrdi3) (low[0], low[0],
12137 GEN_INT (count - single_width)));
12141 if (!rtx_equal_p (operands[0], operands[1]))
12142 emit_move_insn (operands[0], operands[1]);
12143 emit_insn ((mode == DImode
12145 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12146 emit_insn ((mode == DImode
12148 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12153 if (!rtx_equal_p (operands[0], operands[1]))
12154 emit_move_insn (operands[0], operands[1]);
12156 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12158 emit_insn ((mode == DImode
12160 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12161 emit_insn ((mode == DImode
12163 : gen_ashrdi3) (high[0], high[0], operands[2]));
12165 if (TARGET_CMOVE && scratch)
12167 emit_move_insn (scratch, high[0]);
12168 emit_insn ((mode == DImode
12170 : gen_ashrdi3) (scratch, scratch,
12171 GEN_INT (single_width - 1)));
12172 emit_insn ((mode == DImode
12173 ? gen_x86_shift_adj_1
12174 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12178 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12183 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12185 rtx low[2], high[2];
12187 const int single_width = mode == DImode ? 32 : 64;
12189 if (GET_CODE (operands[2]) == CONST_INT)
12191 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12192 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12194 if (count >= single_width)
12196 emit_move_insn (low[0], high[1]);
12197 ix86_expand_clear (high[0]);
12199 if (count > single_width)
12200 emit_insn ((mode == DImode
12202 : gen_lshrdi3) (low[0], low[0],
12203 GEN_INT (count - single_width)));
12207 if (!rtx_equal_p (operands[0], operands[1]))
12208 emit_move_insn (operands[0], operands[1]);
12209 emit_insn ((mode == DImode
12211 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12212 emit_insn ((mode == DImode
12214 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12219 if (!rtx_equal_p (operands[0], operands[1]))
12220 emit_move_insn (operands[0], operands[1]);
12222 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12224 emit_insn ((mode == DImode
12226 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12227 emit_insn ((mode == DImode
12229 : gen_lshrdi3) (high[0], high[0], operands[2]));
12231 /* Heh. By reversing the arguments, we can reuse this pattern. */
12232 if (TARGET_CMOVE && scratch)
12234 ix86_expand_clear (scratch);
12235 emit_insn ((mode == DImode
12236 ? gen_x86_shift_adj_1
12237 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12241 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12245 /* Helper function for the string operations below. Dest VARIABLE whether
12246 it is aligned to VALUE bytes. If true, jump to the label. */
12248 ix86_expand_aligntest (rtx variable, int value)
12250 rtx label = gen_label_rtx ();
12251 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12252 if (GET_MODE (variable) == DImode)
12253 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12255 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12256 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12261 /* Adjust COUNTER by the VALUE. */
12263 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12265 if (GET_MODE (countreg) == DImode)
12266 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12268 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12271 /* Zero extend possibly SImode EXP to Pmode register. */
12273 ix86_zero_extend_to_Pmode (rtx exp)
12276 if (GET_MODE (exp) == VOIDmode)
12277 return force_reg (Pmode, exp);
12278 if (GET_MODE (exp) == Pmode)
12279 return copy_to_mode_reg (Pmode, exp);
12280 r = gen_reg_rtx (Pmode);
12281 emit_insn (gen_zero_extendsidi2 (r, exp));
12285 /* Expand string move (memcpy) operation. Use i386 string operations when
12286 profitable. expand_clrmem contains similar code. */
12288 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12290 rtx srcreg, destreg, countreg, srcexp, destexp;
12291 enum machine_mode counter_mode;
12292 HOST_WIDE_INT align = 0;
12293 unsigned HOST_WIDE_INT count = 0;
12295 if (GET_CODE (align_exp) == CONST_INT)
12296 align = INTVAL (align_exp);
12298 /* Can't use any of this if the user has appropriated esi or edi. */
12299 if (global_regs[4] || global_regs[5])
12302 /* This simple hack avoids all inlining code and simplifies code below. */
12303 if (!TARGET_ALIGN_STRINGOPS)
12306 if (GET_CODE (count_exp) == CONST_INT)
12308 count = INTVAL (count_exp);
12309 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12313 /* Figure out proper mode for counter. For 32bits it is always SImode,
12314 for 64bits use SImode when possible, otherwise DImode.
12315 Set count to number of bytes copied when known at compile time. */
12317 || GET_MODE (count_exp) == SImode
12318 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12319 counter_mode = SImode;
12321 counter_mode = DImode;
12323 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12325 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12326 if (destreg != XEXP (dst, 0))
12327 dst = replace_equiv_address_nv (dst, destreg);
12328 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12329 if (srcreg != XEXP (src, 0))
12330 src = replace_equiv_address_nv (src, srcreg);
12332 /* When optimizing for size emit simple rep ; movsb instruction for
12333 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12334 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12335 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12336 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12337 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12338 known to be zero or not. The rep; movsb sequence causes higher
12339 register pressure though, so take that into account. */
12341 if ((!optimize || optimize_size)
12346 || (count & 3) + count / 4 > 6))))
12348 emit_insn (gen_cld ());
12349 countreg = ix86_zero_extend_to_Pmode (count_exp);
12350 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12351 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12352 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12356 /* For constant aligned (or small unaligned) copies use rep movsl
12357 followed by code copying the rest. For PentiumPro ensure 8 byte
12358 alignment to allow rep movsl acceleration. */
12360 else if (count != 0
12362 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12363 || optimize_size || count < (unsigned int) 64))
12365 unsigned HOST_WIDE_INT offset = 0;
12366 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12367 rtx srcmem, dstmem;
12369 emit_insn (gen_cld ());
12370 if (count & ~(size - 1))
12372 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12374 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12376 while (offset < (count & ~(size - 1)))
12378 srcmem = adjust_automodify_address_nv (src, movs_mode,
12380 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12382 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12388 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12389 & (TARGET_64BIT ? -1 : 0x3fffffff));
12390 countreg = copy_to_mode_reg (counter_mode, countreg);
12391 countreg = ix86_zero_extend_to_Pmode (countreg);
12393 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12394 GEN_INT (size == 4 ? 2 : 3));
12395 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12396 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12398 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12399 countreg, destexp, srcexp));
12400 offset = count & ~(size - 1);
12403 if (size == 8 && (count & 0x04))
12405 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12407 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12409 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12414 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12416 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12418 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12423 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12425 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12427 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12430 /* The generic code based on the glibc implementation:
12431 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12432 allowing accelerated copying there)
12433 - copy the data using rep movsl
12434 - copy the rest. */
12439 rtx srcmem, dstmem;
12440 int desired_alignment = (TARGET_PENTIUMPRO
12441 && (count == 0 || count >= (unsigned int) 260)
12442 ? 8 : UNITS_PER_WORD);
12443 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12444 dst = change_address (dst, BLKmode, destreg);
12445 src = change_address (src, BLKmode, srcreg);
12447 /* In case we don't know anything about the alignment, default to
12448 library version, since it is usually equally fast and result in
12451 Also emit call when we know that the count is large and call overhead
12452 will not be important. */
12453 if (!TARGET_INLINE_ALL_STRINGOPS
12454 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12457 if (TARGET_SINGLE_STRINGOP)
12458 emit_insn (gen_cld ());
12460 countreg2 = gen_reg_rtx (Pmode);
12461 countreg = copy_to_mode_reg (counter_mode, count_exp);
12463 /* We don't use loops to align destination and to copy parts smaller
12464 than 4 bytes, because gcc is able to optimize such code better (in
12465 the case the destination or the count really is aligned, gcc is often
12466 able to predict the branches) and also it is friendlier to the
12467 hardware branch prediction.
12469 Using loops is beneficial for generic case, because we can
12470 handle small counts using the loops. Many CPUs (such as Athlon)
12471 have large REP prefix setup costs.
12473 This is quite costly. Maybe we can revisit this decision later or
12474 add some customizability to this code. */
12476 if (count == 0 && align < desired_alignment)
12478 label = gen_label_rtx ();
12479 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12480 LEU, 0, counter_mode, 1, label);
12484 rtx label = ix86_expand_aligntest (destreg, 1);
12485 srcmem = change_address (src, QImode, srcreg);
12486 dstmem = change_address (dst, QImode, destreg);
12487 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12488 ix86_adjust_counter (countreg, 1);
12489 emit_label (label);
12490 LABEL_NUSES (label) = 1;
12494 rtx label = ix86_expand_aligntest (destreg, 2);
12495 srcmem = change_address (src, HImode, srcreg);
12496 dstmem = change_address (dst, HImode, destreg);
12497 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12498 ix86_adjust_counter (countreg, 2);
12499 emit_label (label);
12500 LABEL_NUSES (label) = 1;
12502 if (align <= 4 && desired_alignment > 4)
12504 rtx label = ix86_expand_aligntest (destreg, 4);
12505 srcmem = change_address (src, SImode, srcreg);
12506 dstmem = change_address (dst, SImode, destreg);
12507 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12508 ix86_adjust_counter (countreg, 4);
12509 emit_label (label);
12510 LABEL_NUSES (label) = 1;
12513 if (label && desired_alignment > 4 && !TARGET_64BIT)
12515 emit_label (label);
12516 LABEL_NUSES (label) = 1;
12519 if (!TARGET_SINGLE_STRINGOP)
12520 emit_insn (gen_cld ());
12523 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12525 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12529 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12530 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12532 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12533 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12534 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12535 countreg2, destexp, srcexp));
12539 emit_label (label);
12540 LABEL_NUSES (label) = 1;
12542 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12544 srcmem = change_address (src, SImode, srcreg);
12545 dstmem = change_address (dst, SImode, destreg);
12546 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12548 if ((align <= 4 || count == 0) && TARGET_64BIT)
12550 rtx label = ix86_expand_aligntest (countreg, 4);
12551 srcmem = change_address (src, SImode, srcreg);
12552 dstmem = change_address (dst, SImode, destreg);
12553 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12554 emit_label (label);
12555 LABEL_NUSES (label) = 1;
12557 if (align > 2 && count != 0 && (count & 2))
12559 srcmem = change_address (src, HImode, srcreg);
12560 dstmem = change_address (dst, HImode, destreg);
12561 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12563 if (align <= 2 || count == 0)
12565 rtx label = ix86_expand_aligntest (countreg, 2);
12566 srcmem = change_address (src, HImode, srcreg);
12567 dstmem = change_address (dst, HImode, destreg);
12568 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12569 emit_label (label);
12570 LABEL_NUSES (label) = 1;
12572 if (align > 1 && count != 0 && (count & 1))
12574 srcmem = change_address (src, QImode, srcreg);
12575 dstmem = change_address (dst, QImode, destreg);
12576 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12578 if (align <= 1 || count == 0)
12580 rtx label = ix86_expand_aligntest (countreg, 1);
12581 srcmem = change_address (src, QImode, srcreg);
12582 dstmem = change_address (dst, QImode, destreg);
12583 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12584 emit_label (label);
12585 LABEL_NUSES (label) = 1;
12592 /* Expand string clear operation (bzero). Use i386 string operations when
12593 profitable. expand_movmem contains similar code. */
12595 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12597 rtx destreg, zeroreg, countreg, destexp;
12598 enum machine_mode counter_mode;
12599 HOST_WIDE_INT align = 0;
12600 unsigned HOST_WIDE_INT count = 0;
12602 if (GET_CODE (align_exp) == CONST_INT)
12603 align = INTVAL (align_exp);
12605 /* Can't use any of this if the user has appropriated esi. */
12606 if (global_regs[4])
12609 /* This simple hack avoids all inlining code and simplifies code below. */
12610 if (!TARGET_ALIGN_STRINGOPS)
12613 if (GET_CODE (count_exp) == CONST_INT)
12615 count = INTVAL (count_exp);
12616 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12619 /* Figure out proper mode for counter. For 32bits it is always SImode,
12620 for 64bits use SImode when possible, otherwise DImode.
12621 Set count to number of bytes copied when known at compile time. */
12623 || GET_MODE (count_exp) == SImode
12624 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12625 counter_mode = SImode;
12627 counter_mode = DImode;
12629 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12630 if (destreg != XEXP (dst, 0))
12631 dst = replace_equiv_address_nv (dst, destreg);
12634 /* When optimizing for size emit simple rep ; movsb instruction for
12635 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12636 sequence is 7 bytes long, so if optimizing for size and count is
12637 small enough that some stosl, stosw and stosb instructions without
12638 rep are shorter, fall back into the next if. */
12640 if ((!optimize || optimize_size)
12643 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12645 emit_insn (gen_cld ());
12647 countreg = ix86_zero_extend_to_Pmode (count_exp);
12648 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12649 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12650 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12652 else if (count != 0
12654 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12655 || optimize_size || count < (unsigned int) 64))
12657 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12658 unsigned HOST_WIDE_INT offset = 0;
12660 emit_insn (gen_cld ());
12662 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12663 if (count & ~(size - 1))
12665 unsigned HOST_WIDE_INT repcount;
12666 unsigned int max_nonrep;
12668 repcount = count >> (size == 4 ? 2 : 3);
12670 repcount &= 0x3fffffff;
12672 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12673 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12674 bytes. In both cases the latter seems to be faster for small
12676 max_nonrep = size == 4 ? 7 : 4;
12677 if (!optimize_size)
12680 case PROCESSOR_PENTIUM4:
12681 case PROCESSOR_NOCONA:
12688 if (repcount <= max_nonrep)
12689 while (repcount-- > 0)
12691 rtx mem = adjust_automodify_address_nv (dst,
12692 GET_MODE (zeroreg),
12694 emit_insn (gen_strset (destreg, mem, zeroreg));
12699 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12700 countreg = ix86_zero_extend_to_Pmode (countreg);
12701 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12702 GEN_INT (size == 4 ? 2 : 3));
12703 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12704 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12706 offset = count & ~(size - 1);
12709 if (size == 8 && (count & 0x04))
12711 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12713 emit_insn (gen_strset (destreg, mem,
12714 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12719 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12721 emit_insn (gen_strset (destreg, mem,
12722 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12727 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12729 emit_insn (gen_strset (destreg, mem,
12730 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12737 /* Compute desired alignment of the string operation. */
12738 int desired_alignment = (TARGET_PENTIUMPRO
12739 && (count == 0 || count >= (unsigned int) 260)
12740 ? 8 : UNITS_PER_WORD);
12742 /* In case we don't know anything about the alignment, default to
12743 library version, since it is usually equally fast and result in
12746 Also emit call when we know that the count is large and call overhead
12747 will not be important. */
12748 if (!TARGET_INLINE_ALL_STRINGOPS
12749 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12752 if (TARGET_SINGLE_STRINGOP)
12753 emit_insn (gen_cld ());
12755 countreg2 = gen_reg_rtx (Pmode);
12756 countreg = copy_to_mode_reg (counter_mode, count_exp);
12757 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12758 /* Get rid of MEM_OFFSET, it won't be accurate. */
12759 dst = change_address (dst, BLKmode, destreg);
12761 if (count == 0 && align < desired_alignment)
12763 label = gen_label_rtx ();
12764 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12765 LEU, 0, counter_mode, 1, label);
12769 rtx label = ix86_expand_aligntest (destreg, 1);
12770 emit_insn (gen_strset (destreg, dst,
12771 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12772 ix86_adjust_counter (countreg, 1);
12773 emit_label (label);
12774 LABEL_NUSES (label) = 1;
12778 rtx label = ix86_expand_aligntest (destreg, 2);
12779 emit_insn (gen_strset (destreg, dst,
12780 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12781 ix86_adjust_counter (countreg, 2);
12782 emit_label (label);
12783 LABEL_NUSES (label) = 1;
12785 if (align <= 4 && desired_alignment > 4)
12787 rtx label = ix86_expand_aligntest (destreg, 4);
12788 emit_insn (gen_strset (destreg, dst,
12790 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12792 ix86_adjust_counter (countreg, 4);
12793 emit_label (label);
12794 LABEL_NUSES (label) = 1;
12797 if (label && desired_alignment > 4 && !TARGET_64BIT)
12799 emit_label (label);
12800 LABEL_NUSES (label) = 1;
12804 if (!TARGET_SINGLE_STRINGOP)
12805 emit_insn (gen_cld ());
12808 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12810 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12814 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12815 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12817 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12818 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12822 emit_label (label);
12823 LABEL_NUSES (label) = 1;
12826 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12827 emit_insn (gen_strset (destreg, dst,
12828 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12829 if (TARGET_64BIT && (align <= 4 || count == 0))
12831 rtx label = ix86_expand_aligntest (countreg, 4);
12832 emit_insn (gen_strset (destreg, dst,
12833 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12834 emit_label (label);
12835 LABEL_NUSES (label) = 1;
12837 if (align > 2 && count != 0 && (count & 2))
12838 emit_insn (gen_strset (destreg, dst,
12839 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12840 if (align <= 2 || count == 0)
12842 rtx label = ix86_expand_aligntest (countreg, 2);
12843 emit_insn (gen_strset (destreg, dst,
12844 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12845 emit_label (label);
12846 LABEL_NUSES (label) = 1;
12848 if (align > 1 && count != 0 && (count & 1))
12849 emit_insn (gen_strset (destreg, dst,
12850 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12851 if (align <= 1 || count == 0)
12853 rtx label = ix86_expand_aligntest (countreg, 1);
12854 emit_insn (gen_strset (destreg, dst,
12855 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12856 emit_label (label);
12857 LABEL_NUSES (label) = 1;
12863 /* Expand strlen. */
12865 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12867 rtx addr, scratch1, scratch2, scratch3, scratch4;
12869 /* The generic case of strlen expander is long. Avoid it's
12870 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12872 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12873 && !TARGET_INLINE_ALL_STRINGOPS
12875 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12878 addr = force_reg (Pmode, XEXP (src, 0));
12879 scratch1 = gen_reg_rtx (Pmode);
12881 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12884 /* Well it seems that some optimizer does not combine a call like
12885 foo(strlen(bar), strlen(bar));
12886 when the move and the subtraction is done here. It does calculate
12887 the length just once when these instructions are done inside of
12888 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12889 often used and I use one fewer register for the lifetime of
12890 output_strlen_unroll() this is better. */
12892 emit_move_insn (out, addr);
12894 ix86_expand_strlensi_unroll_1 (out, src, align);
12896 /* strlensi_unroll_1 returns the address of the zero at the end of
12897 the string, like memchr(), so compute the length by subtracting
12898 the start address. */
12900 emit_insn (gen_subdi3 (out, out, addr));
12902 emit_insn (gen_subsi3 (out, out, addr));
12907 scratch2 = gen_reg_rtx (Pmode);
12908 scratch3 = gen_reg_rtx (Pmode);
12909 scratch4 = force_reg (Pmode, constm1_rtx);
12911 emit_move_insn (scratch3, addr);
12912 eoschar = force_reg (QImode, eoschar);
12914 emit_insn (gen_cld ());
12915 src = replace_equiv_address_nv (src, scratch3);
12917 /* If .md starts supporting :P, this can be done in .md. */
12918 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12919 scratch4), UNSPEC_SCAS);
12920 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12923 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12924 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12928 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12929 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12935 /* Expand the appropriate insns for doing strlen if not just doing
12938 out = result, initialized with the start address
12939 align_rtx = alignment of the address.
12940 scratch = scratch register, initialized with the startaddress when
12941 not aligned, otherwise undefined
12943 This is just the body. It needs the initializations mentioned above and
12944 some address computing at the end. These things are done in i386.md. */
12947 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12951 rtx align_2_label = NULL_RTX;
12952 rtx align_3_label = NULL_RTX;
12953 rtx align_4_label = gen_label_rtx ();
12954 rtx end_0_label = gen_label_rtx ();
12956 rtx tmpreg = gen_reg_rtx (SImode);
12957 rtx scratch = gen_reg_rtx (SImode);
12961 if (GET_CODE (align_rtx) == CONST_INT)
12962 align = INTVAL (align_rtx);
12964 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12966 /* Is there a known alignment and is it less than 4? */
12969 rtx scratch1 = gen_reg_rtx (Pmode);
12970 emit_move_insn (scratch1, out);
12971 /* Is there a known alignment and is it not 2? */
12974 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12975 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12977 /* Leave just the 3 lower bits. */
12978 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12979 NULL_RTX, 0, OPTAB_WIDEN);
12981 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12982 Pmode, 1, align_4_label);
12983 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12984 Pmode, 1, align_2_label);
12985 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12986 Pmode, 1, align_3_label);
12990 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12991 check if is aligned to 4 - byte. */
12993 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12994 NULL_RTX, 0, OPTAB_WIDEN);
12996 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12997 Pmode, 1, align_4_label);
13000 mem = change_address (src, QImode, out);
13002 /* Now compare the bytes. */
13004 /* Compare the first n unaligned byte on a byte per byte basis. */
13005 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13006 QImode, 1, end_0_label);
13008 /* Increment the address. */
13010 emit_insn (gen_adddi3 (out, out, const1_rtx));
13012 emit_insn (gen_addsi3 (out, out, const1_rtx));
13014 /* Not needed with an alignment of 2 */
13017 emit_label (align_2_label);
13019 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13023 emit_insn (gen_adddi3 (out, out, const1_rtx));
13025 emit_insn (gen_addsi3 (out, out, const1_rtx));
13027 emit_label (align_3_label);
13030 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13034 emit_insn (gen_adddi3 (out, out, const1_rtx));
13036 emit_insn (gen_addsi3 (out, out, const1_rtx));
13039 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13040 align this loop. It gives only huge programs, but does not help to
13042 emit_label (align_4_label);
13044 mem = change_address (src, SImode, out);
13045 emit_move_insn (scratch, mem);
13047 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13049 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13051 /* This formula yields a nonzero result iff one of the bytes is zero.
13052 This saves three branches inside loop and many cycles. */
13054 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13055 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13056 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13057 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13058 gen_int_mode (0x80808080, SImode)));
13059 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13064 rtx reg = gen_reg_rtx (SImode);
13065 rtx reg2 = gen_reg_rtx (Pmode);
13066 emit_move_insn (reg, tmpreg);
13067 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13069 /* If zero is not in the first two bytes, move two bytes forward. */
13070 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13071 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13072 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13073 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13074 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13077 /* Emit lea manually to avoid clobbering of flags. */
13078 emit_insn (gen_rtx_SET (SImode, reg2,
13079 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13081 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13082 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13083 emit_insn (gen_rtx_SET (VOIDmode, out,
13084 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13091 rtx end_2_label = gen_label_rtx ();
13092 /* Is zero in the first two bytes? */
13094 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13095 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13096 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13097 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13098 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13100 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13101 JUMP_LABEL (tmp) = end_2_label;
13103 /* Not in the first two. Move two bytes forward. */
13104 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13106 emit_insn (gen_adddi3 (out, out, const2_rtx));
13108 emit_insn (gen_addsi3 (out, out, const2_rtx));
13110 emit_label (end_2_label);
13114 /* Avoid branch in fixing the byte. */
13115 tmpreg = gen_lowpart (QImode, tmpreg);
13116 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13117 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13119 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13121 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13123 emit_label (end_0_label);
13127 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13128 rtx callarg2 ATTRIBUTE_UNUSED,
13129 rtx pop, int sibcall)
13131 rtx use = NULL, call;
13133 if (pop == const0_rtx)
13135 gcc_assert (!TARGET_64BIT || !pop);
13138 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13139 fnaddr = machopic_indirect_call_target (fnaddr);
13141 /* Static functions and indirect calls don't need the pic register. */
13142 if (! TARGET_64BIT && flag_pic
13143 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13144 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13145 use_reg (&use, pic_offset_table_rtx);
13147 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13149 rtx al = gen_rtx_REG (QImode, 0);
13150 emit_move_insn (al, callarg2);
13151 use_reg (&use, al);
13153 #endif /* TARGET_MACHO */
13155 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13157 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13158 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13160 if (sibcall && TARGET_64BIT
13161 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13164 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13165 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13166 emit_move_insn (fnaddr, addr);
13167 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13170 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13172 call = gen_rtx_SET (VOIDmode, retval, call);
13175 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13176 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13177 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13180 call = emit_call_insn (call);
13182 CALL_INSN_FUNCTION_USAGE (call) = use;
13186 /* Clear stack slot assignments remembered from previous functions.
13187 This is called from INIT_EXPANDERS once before RTL is emitted for each
13190 static struct machine_function *
13191 ix86_init_machine_status (void)
13193 struct machine_function *f;
13195 f = ggc_alloc_cleared (sizeof (struct machine_function));
13196 f->use_fast_prologue_epilogue_nregs = -1;
13197 f->tls_descriptor_call_expanded_p = 0;
13202 /* Return a MEM corresponding to a stack slot with mode MODE.
13203 Allocate a new slot if necessary.
13205 The RTL for a function can have several slots available: N is
13206 which slot to use. */
13209 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13211 struct stack_local_entry *s;
13213 gcc_assert (n < MAX_386_STACK_LOCALS);
13215 for (s = ix86_stack_locals; s; s = s->next)
13216 if (s->mode == mode && s->n == n)
13219 s = (struct stack_local_entry *)
13220 ggc_alloc (sizeof (struct stack_local_entry));
13223 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13225 s->next = ix86_stack_locals;
13226 ix86_stack_locals = s;
13230 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13232 static GTY(()) rtx ix86_tls_symbol;
13234 ix86_tls_get_addr (void)
13237 if (!ix86_tls_symbol)
13239 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13240 (TARGET_ANY_GNU_TLS
13242 ? "___tls_get_addr"
13243 : "__tls_get_addr");
13246 return ix86_tls_symbol;
13249 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13251 static GTY(()) rtx ix86_tls_module_base_symbol;
13253 ix86_tls_module_base (void)
13256 if (!ix86_tls_module_base_symbol)
13258 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13259 "_TLS_MODULE_BASE_");
13260 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13261 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13264 return ix86_tls_module_base_symbol;
13267 /* Calculate the length of the memory address in the instruction
13268 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13271 memory_address_length (rtx addr)
13273 struct ix86_address parts;
13274 rtx base, index, disp;
13278 if (GET_CODE (addr) == PRE_DEC
13279 || GET_CODE (addr) == POST_INC
13280 || GET_CODE (addr) == PRE_MODIFY
13281 || GET_CODE (addr) == POST_MODIFY)
13284 ok = ix86_decompose_address (addr, &parts);
13287 if (parts.base && GET_CODE (parts.base) == SUBREG)
13288 parts.base = SUBREG_REG (parts.base);
13289 if (parts.index && GET_CODE (parts.index) == SUBREG)
13290 parts.index = SUBREG_REG (parts.index);
13293 index = parts.index;
13298 - esp as the base always wants an index,
13299 - ebp as the base always wants a displacement. */
13301 /* Register Indirect. */
13302 if (base && !index && !disp)
13304 /* esp (for its index) and ebp (for its displacement) need
13305 the two-byte modrm form. */
13306 if (addr == stack_pointer_rtx
13307 || addr == arg_pointer_rtx
13308 || addr == frame_pointer_rtx
13309 || addr == hard_frame_pointer_rtx)
13313 /* Direct Addressing. */
13314 else if (disp && !base && !index)
13319 /* Find the length of the displacement constant. */
13322 if (base && satisfies_constraint_K (disp))
13327 /* ebp always wants a displacement. */
13328 else if (base == hard_frame_pointer_rtx)
13331 /* An index requires the two-byte modrm form.... */
13333 /* ...like esp, which always wants an index. */
13334 || base == stack_pointer_rtx
13335 || base == arg_pointer_rtx
13336 || base == frame_pointer_rtx)
13343 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13344 is set, expect that insn have 8bit immediate alternative. */
13346 ix86_attr_length_immediate_default (rtx insn, int shortform)
13350 extract_insn_cached (insn);
13351 for (i = recog_data.n_operands - 1; i >= 0; --i)
13352 if (CONSTANT_P (recog_data.operand[i]))
13355 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13359 switch (get_attr_mode (insn))
13370 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13375 fatal_insn ("unknown insn mode", insn);
13381 /* Compute default value for "length_address" attribute. */
13383 ix86_attr_length_address_default (rtx insn)
13387 if (get_attr_type (insn) == TYPE_LEA)
13389 rtx set = PATTERN (insn);
13391 if (GET_CODE (set) == PARALLEL)
13392 set = XVECEXP (set, 0, 0);
13394 gcc_assert (GET_CODE (set) == SET);
13396 return memory_address_length (SET_SRC (set));
13399 extract_insn_cached (insn);
13400 for (i = recog_data.n_operands - 1; i >= 0; --i)
13401 if (GET_CODE (recog_data.operand[i]) == MEM)
13403 return memory_address_length (XEXP (recog_data.operand[i], 0));
13409 /* Return the maximum number of instructions a cpu can issue. */
13412 ix86_issue_rate (void)
13416 case PROCESSOR_PENTIUM:
13420 case PROCESSOR_PENTIUMPRO:
13421 case PROCESSOR_PENTIUM4:
13422 case PROCESSOR_ATHLON:
13424 case PROCESSOR_NOCONA:
13425 case PROCESSOR_GENERIC32:
13426 case PROCESSOR_GENERIC64:
13434 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13435 by DEP_INSN and nothing set by DEP_INSN. */
13438 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13442 /* Simplify the test for uninteresting insns. */
13443 if (insn_type != TYPE_SETCC
13444 && insn_type != TYPE_ICMOV
13445 && insn_type != TYPE_FCMOV
13446 && insn_type != TYPE_IBR)
13449 if ((set = single_set (dep_insn)) != 0)
13451 set = SET_DEST (set);
13454 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13455 && XVECLEN (PATTERN (dep_insn), 0) == 2
13456 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13457 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13459 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13460 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13465 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13468 /* This test is true if the dependent insn reads the flags but
13469 not any other potentially set register. */
13470 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13473 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13479 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13480 address with operands set by DEP_INSN. */
13483 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13487 if (insn_type == TYPE_LEA
13490 addr = PATTERN (insn);
13492 if (GET_CODE (addr) == PARALLEL)
13493 addr = XVECEXP (addr, 0, 0);
13495 gcc_assert (GET_CODE (addr) == SET);
13497 addr = SET_SRC (addr);
13502 extract_insn_cached (insn);
13503 for (i = recog_data.n_operands - 1; i >= 0; --i)
13504 if (GET_CODE (recog_data.operand[i]) == MEM)
13506 addr = XEXP (recog_data.operand[i], 0);
13513 return modified_in_p (addr, dep_insn);
13517 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13519 enum attr_type insn_type, dep_insn_type;
13520 enum attr_memory memory;
13522 int dep_insn_code_number;
13524 /* Anti and output dependencies have zero cost on all CPUs. */
13525 if (REG_NOTE_KIND (link) != 0)
13528 dep_insn_code_number = recog_memoized (dep_insn);
13530 /* If we can't recognize the insns, we can't really do anything. */
13531 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13534 insn_type = get_attr_type (insn);
13535 dep_insn_type = get_attr_type (dep_insn);
13539 case PROCESSOR_PENTIUM:
13540 /* Address Generation Interlock adds a cycle of latency. */
13541 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13544 /* ??? Compares pair with jump/setcc. */
13545 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13548 /* Floating point stores require value to be ready one cycle earlier. */
13549 if (insn_type == TYPE_FMOV
13550 && get_attr_memory (insn) == MEMORY_STORE
13551 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13555 case PROCESSOR_PENTIUMPRO:
13556 memory = get_attr_memory (insn);
13558 /* INT->FP conversion is expensive. */
13559 if (get_attr_fp_int_src (dep_insn))
13562 /* There is one cycle extra latency between an FP op and a store. */
13563 if (insn_type == TYPE_FMOV
13564 && (set = single_set (dep_insn)) != NULL_RTX
13565 && (set2 = single_set (insn)) != NULL_RTX
13566 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13567 && GET_CODE (SET_DEST (set2)) == MEM)
13570 /* Show ability of reorder buffer to hide latency of load by executing
13571 in parallel with previous instruction in case
13572 previous instruction is not needed to compute the address. */
13573 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13574 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13576 /* Claim moves to take one cycle, as core can issue one load
13577 at time and the next load can start cycle later. */
13578 if (dep_insn_type == TYPE_IMOV
13579 || dep_insn_type == TYPE_FMOV)
13587 memory = get_attr_memory (insn);
13589 /* The esp dependency is resolved before the instruction is really
13591 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13592 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13595 /* INT->FP conversion is expensive. */
13596 if (get_attr_fp_int_src (dep_insn))
13599 /* Show ability of reorder buffer to hide latency of load by executing
13600 in parallel with previous instruction in case
13601 previous instruction is not needed to compute the address. */
13602 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13603 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13605 /* Claim moves to take one cycle, as core can issue one load
13606 at time and the next load can start cycle later. */
13607 if (dep_insn_type == TYPE_IMOV
13608 || dep_insn_type == TYPE_FMOV)
13617 case PROCESSOR_ATHLON:
13619 case PROCESSOR_GENERIC32:
13620 case PROCESSOR_GENERIC64:
13621 memory = get_attr_memory (insn);
13623 /* Show ability of reorder buffer to hide latency of load by executing
13624 in parallel with previous instruction in case
13625 previous instruction is not needed to compute the address. */
13626 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13627 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13629 enum attr_unit unit = get_attr_unit (insn);
13632 /* Because of the difference between the length of integer and
13633 floating unit pipeline preparation stages, the memory operands
13634 for floating point are cheaper.
13636 ??? For Athlon it the difference is most probably 2. */
13637 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13640 loadcost = TARGET_ATHLON ? 2 : 0;
13642 if (cost >= loadcost)
13655 /* How many alternative schedules to try. This should be as wide as the
13656 scheduling freedom in the DFA, but no wider. Making this value too
13657 large results extra work for the scheduler. */
13660 ia32_multipass_dfa_lookahead (void)
13662 if (ix86_tune == PROCESSOR_PENTIUM)
13665 if (ix86_tune == PROCESSOR_PENTIUMPRO
13666 || ix86_tune == PROCESSOR_K6)
13674 /* Compute the alignment given to a constant that is being placed in memory.
13675 EXP is the constant and ALIGN is the alignment that the object would
13677 The value of this function is used instead of that alignment to align
13681 ix86_constant_alignment (tree exp, int align)
13683 if (TREE_CODE (exp) == REAL_CST)
13685 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13687 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13690 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13691 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13692 return BITS_PER_WORD;
13697 /* Compute the alignment for a static variable.
13698 TYPE is the data type, and ALIGN is the alignment that
13699 the object would ordinarily have. The value of this function is used
13700 instead of that alignment to align the object. */
13703 ix86_data_alignment (tree type, int align)
13705 int max_align = optimize_size ? BITS_PER_WORD : 256;
13707 if (AGGREGATE_TYPE_P (type)
13708 && TYPE_SIZE (type)
13709 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13710 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13711 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13712 && align < max_align)
13715 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13716 to 16byte boundary. */
13719 if (AGGREGATE_TYPE_P (type)
13720 && TYPE_SIZE (type)
13721 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13722 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13723 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13727 if (TREE_CODE (type) == ARRAY_TYPE)
13729 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13731 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13734 else if (TREE_CODE (type) == COMPLEX_TYPE)
13737 if (TYPE_MODE (type) == DCmode && align < 64)
13739 if (TYPE_MODE (type) == XCmode && align < 128)
13742 else if ((TREE_CODE (type) == RECORD_TYPE
13743 || TREE_CODE (type) == UNION_TYPE
13744 || TREE_CODE (type) == QUAL_UNION_TYPE)
13745 && TYPE_FIELDS (type))
13747 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13749 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13752 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13753 || TREE_CODE (type) == INTEGER_TYPE)
13755 if (TYPE_MODE (type) == DFmode && align < 64)
13757 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13764 /* Compute the alignment for a local variable.
13765 TYPE is the data type, and ALIGN is the alignment that
13766 the object would ordinarily have. The value of this macro is used
13767 instead of that alignment to align the object. */
13770 ix86_local_alignment (tree type, int align)
13772 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13773 to 16byte boundary. */
13776 if (AGGREGATE_TYPE_P (type)
13777 && TYPE_SIZE (type)
13778 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13779 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13780 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13783 if (TREE_CODE (type) == ARRAY_TYPE)
13785 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13787 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13790 else if (TREE_CODE (type) == COMPLEX_TYPE)
13792 if (TYPE_MODE (type) == DCmode && align < 64)
13794 if (TYPE_MODE (type) == XCmode && align < 128)
13797 else if ((TREE_CODE (type) == RECORD_TYPE
13798 || TREE_CODE (type) == UNION_TYPE
13799 || TREE_CODE (type) == QUAL_UNION_TYPE)
13800 && TYPE_FIELDS (type))
13802 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13804 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13807 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13808 || TREE_CODE (type) == INTEGER_TYPE)
13811 if (TYPE_MODE (type) == DFmode && align < 64)
13813 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13819 /* Emit RTL insns to initialize the variable parts of a trampoline.
13820 FNADDR is an RTX for the address of the function's pure code.
13821 CXT is an RTX for the static chain value for the function. */
13823 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13827 /* Compute offset from the end of the jmp to the target function. */
13828 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13829 plus_constant (tramp, 10),
13830 NULL_RTX, 1, OPTAB_DIRECT);
13831 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13832 gen_int_mode (0xb9, QImode));
13833 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13834 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13835 gen_int_mode (0xe9, QImode));
13836 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13841 /* Try to load address using shorter movl instead of movabs.
13842 We may want to support movq for kernel mode, but kernel does not use
13843 trampolines at the moment. */
13844 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13846 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13847 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13848 gen_int_mode (0xbb41, HImode));
13849 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13850 gen_lowpart (SImode, fnaddr));
13855 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13856 gen_int_mode (0xbb49, HImode));
13857 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13861 /* Load static chain using movabs to r10. */
13862 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13863 gen_int_mode (0xba49, HImode));
13864 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13867 /* Jump to the r11 */
13868 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13869 gen_int_mode (0xff49, HImode));
13870 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13871 gen_int_mode (0xe3, QImode));
13873 gcc_assert (offset <= TRAMPOLINE_SIZE);
13876 #ifdef ENABLE_EXECUTE_STACK
13877 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13878 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13882 /* Codes for all the SSE/MMX builtins. */
13885 IX86_BUILTIN_ADDPS,
13886 IX86_BUILTIN_ADDSS,
13887 IX86_BUILTIN_DIVPS,
13888 IX86_BUILTIN_DIVSS,
13889 IX86_BUILTIN_MULPS,
13890 IX86_BUILTIN_MULSS,
13891 IX86_BUILTIN_SUBPS,
13892 IX86_BUILTIN_SUBSS,
13894 IX86_BUILTIN_CMPEQPS,
13895 IX86_BUILTIN_CMPLTPS,
13896 IX86_BUILTIN_CMPLEPS,
13897 IX86_BUILTIN_CMPGTPS,
13898 IX86_BUILTIN_CMPGEPS,
13899 IX86_BUILTIN_CMPNEQPS,
13900 IX86_BUILTIN_CMPNLTPS,
13901 IX86_BUILTIN_CMPNLEPS,
13902 IX86_BUILTIN_CMPNGTPS,
13903 IX86_BUILTIN_CMPNGEPS,
13904 IX86_BUILTIN_CMPORDPS,
13905 IX86_BUILTIN_CMPUNORDPS,
13906 IX86_BUILTIN_CMPEQSS,
13907 IX86_BUILTIN_CMPLTSS,
13908 IX86_BUILTIN_CMPLESS,
13909 IX86_BUILTIN_CMPNEQSS,
13910 IX86_BUILTIN_CMPNLTSS,
13911 IX86_BUILTIN_CMPNLESS,
13912 IX86_BUILTIN_CMPNGTSS,
13913 IX86_BUILTIN_CMPNGESS,
13914 IX86_BUILTIN_CMPORDSS,
13915 IX86_BUILTIN_CMPUNORDSS,
13917 IX86_BUILTIN_COMIEQSS,
13918 IX86_BUILTIN_COMILTSS,
13919 IX86_BUILTIN_COMILESS,
13920 IX86_BUILTIN_COMIGTSS,
13921 IX86_BUILTIN_COMIGESS,
13922 IX86_BUILTIN_COMINEQSS,
13923 IX86_BUILTIN_UCOMIEQSS,
13924 IX86_BUILTIN_UCOMILTSS,
13925 IX86_BUILTIN_UCOMILESS,
13926 IX86_BUILTIN_UCOMIGTSS,
13927 IX86_BUILTIN_UCOMIGESS,
13928 IX86_BUILTIN_UCOMINEQSS,
13930 IX86_BUILTIN_CVTPI2PS,
13931 IX86_BUILTIN_CVTPS2PI,
13932 IX86_BUILTIN_CVTSI2SS,
13933 IX86_BUILTIN_CVTSI642SS,
13934 IX86_BUILTIN_CVTSS2SI,
13935 IX86_BUILTIN_CVTSS2SI64,
13936 IX86_BUILTIN_CVTTPS2PI,
13937 IX86_BUILTIN_CVTTSS2SI,
13938 IX86_BUILTIN_CVTTSS2SI64,
13940 IX86_BUILTIN_MAXPS,
13941 IX86_BUILTIN_MAXSS,
13942 IX86_BUILTIN_MINPS,
13943 IX86_BUILTIN_MINSS,
13945 IX86_BUILTIN_LOADUPS,
13946 IX86_BUILTIN_STOREUPS,
13947 IX86_BUILTIN_MOVSS,
13949 IX86_BUILTIN_MOVHLPS,
13950 IX86_BUILTIN_MOVLHPS,
13951 IX86_BUILTIN_LOADHPS,
13952 IX86_BUILTIN_LOADLPS,
13953 IX86_BUILTIN_STOREHPS,
13954 IX86_BUILTIN_STORELPS,
13956 IX86_BUILTIN_MASKMOVQ,
13957 IX86_BUILTIN_MOVMSKPS,
13958 IX86_BUILTIN_PMOVMSKB,
13960 IX86_BUILTIN_MOVNTPS,
13961 IX86_BUILTIN_MOVNTQ,
13963 IX86_BUILTIN_LOADDQU,
13964 IX86_BUILTIN_STOREDQU,
13966 IX86_BUILTIN_PACKSSWB,
13967 IX86_BUILTIN_PACKSSDW,
13968 IX86_BUILTIN_PACKUSWB,
13970 IX86_BUILTIN_PADDB,
13971 IX86_BUILTIN_PADDW,
13972 IX86_BUILTIN_PADDD,
13973 IX86_BUILTIN_PADDQ,
13974 IX86_BUILTIN_PADDSB,
13975 IX86_BUILTIN_PADDSW,
13976 IX86_BUILTIN_PADDUSB,
13977 IX86_BUILTIN_PADDUSW,
13978 IX86_BUILTIN_PSUBB,
13979 IX86_BUILTIN_PSUBW,
13980 IX86_BUILTIN_PSUBD,
13981 IX86_BUILTIN_PSUBQ,
13982 IX86_BUILTIN_PSUBSB,
13983 IX86_BUILTIN_PSUBSW,
13984 IX86_BUILTIN_PSUBUSB,
13985 IX86_BUILTIN_PSUBUSW,
13988 IX86_BUILTIN_PANDN,
13992 IX86_BUILTIN_PAVGB,
13993 IX86_BUILTIN_PAVGW,
13995 IX86_BUILTIN_PCMPEQB,
13996 IX86_BUILTIN_PCMPEQW,
13997 IX86_BUILTIN_PCMPEQD,
13998 IX86_BUILTIN_PCMPGTB,
13999 IX86_BUILTIN_PCMPGTW,
14000 IX86_BUILTIN_PCMPGTD,
14002 IX86_BUILTIN_PMADDWD,
14004 IX86_BUILTIN_PMAXSW,
14005 IX86_BUILTIN_PMAXUB,
14006 IX86_BUILTIN_PMINSW,
14007 IX86_BUILTIN_PMINUB,
14009 IX86_BUILTIN_PMULHUW,
14010 IX86_BUILTIN_PMULHW,
14011 IX86_BUILTIN_PMULLW,
14013 IX86_BUILTIN_PSADBW,
14014 IX86_BUILTIN_PSHUFW,
14016 IX86_BUILTIN_PSLLW,
14017 IX86_BUILTIN_PSLLD,
14018 IX86_BUILTIN_PSLLQ,
14019 IX86_BUILTIN_PSRAW,
14020 IX86_BUILTIN_PSRAD,
14021 IX86_BUILTIN_PSRLW,
14022 IX86_BUILTIN_PSRLD,
14023 IX86_BUILTIN_PSRLQ,
14024 IX86_BUILTIN_PSLLWI,
14025 IX86_BUILTIN_PSLLDI,
14026 IX86_BUILTIN_PSLLQI,
14027 IX86_BUILTIN_PSRAWI,
14028 IX86_BUILTIN_PSRADI,
14029 IX86_BUILTIN_PSRLWI,
14030 IX86_BUILTIN_PSRLDI,
14031 IX86_BUILTIN_PSRLQI,
14033 IX86_BUILTIN_PUNPCKHBW,
14034 IX86_BUILTIN_PUNPCKHWD,
14035 IX86_BUILTIN_PUNPCKHDQ,
14036 IX86_BUILTIN_PUNPCKLBW,
14037 IX86_BUILTIN_PUNPCKLWD,
14038 IX86_BUILTIN_PUNPCKLDQ,
14040 IX86_BUILTIN_SHUFPS,
14042 IX86_BUILTIN_RCPPS,
14043 IX86_BUILTIN_RCPSS,
14044 IX86_BUILTIN_RSQRTPS,
14045 IX86_BUILTIN_RSQRTSS,
14046 IX86_BUILTIN_SQRTPS,
14047 IX86_BUILTIN_SQRTSS,
14049 IX86_BUILTIN_UNPCKHPS,
14050 IX86_BUILTIN_UNPCKLPS,
14052 IX86_BUILTIN_ANDPS,
14053 IX86_BUILTIN_ANDNPS,
14055 IX86_BUILTIN_XORPS,
14058 IX86_BUILTIN_LDMXCSR,
14059 IX86_BUILTIN_STMXCSR,
14060 IX86_BUILTIN_SFENCE,
14062 /* 3DNow! Original */
14063 IX86_BUILTIN_FEMMS,
14064 IX86_BUILTIN_PAVGUSB,
14065 IX86_BUILTIN_PF2ID,
14066 IX86_BUILTIN_PFACC,
14067 IX86_BUILTIN_PFADD,
14068 IX86_BUILTIN_PFCMPEQ,
14069 IX86_BUILTIN_PFCMPGE,
14070 IX86_BUILTIN_PFCMPGT,
14071 IX86_BUILTIN_PFMAX,
14072 IX86_BUILTIN_PFMIN,
14073 IX86_BUILTIN_PFMUL,
14074 IX86_BUILTIN_PFRCP,
14075 IX86_BUILTIN_PFRCPIT1,
14076 IX86_BUILTIN_PFRCPIT2,
14077 IX86_BUILTIN_PFRSQIT1,
14078 IX86_BUILTIN_PFRSQRT,
14079 IX86_BUILTIN_PFSUB,
14080 IX86_BUILTIN_PFSUBR,
14081 IX86_BUILTIN_PI2FD,
14082 IX86_BUILTIN_PMULHRW,
14084 /* 3DNow! Athlon Extensions */
14085 IX86_BUILTIN_PF2IW,
14086 IX86_BUILTIN_PFNACC,
14087 IX86_BUILTIN_PFPNACC,
14088 IX86_BUILTIN_PI2FW,
14089 IX86_BUILTIN_PSWAPDSI,
14090 IX86_BUILTIN_PSWAPDSF,
14093 IX86_BUILTIN_ADDPD,
14094 IX86_BUILTIN_ADDSD,
14095 IX86_BUILTIN_DIVPD,
14096 IX86_BUILTIN_DIVSD,
14097 IX86_BUILTIN_MULPD,
14098 IX86_BUILTIN_MULSD,
14099 IX86_BUILTIN_SUBPD,
14100 IX86_BUILTIN_SUBSD,
14102 IX86_BUILTIN_CMPEQPD,
14103 IX86_BUILTIN_CMPLTPD,
14104 IX86_BUILTIN_CMPLEPD,
14105 IX86_BUILTIN_CMPGTPD,
14106 IX86_BUILTIN_CMPGEPD,
14107 IX86_BUILTIN_CMPNEQPD,
14108 IX86_BUILTIN_CMPNLTPD,
14109 IX86_BUILTIN_CMPNLEPD,
14110 IX86_BUILTIN_CMPNGTPD,
14111 IX86_BUILTIN_CMPNGEPD,
14112 IX86_BUILTIN_CMPORDPD,
14113 IX86_BUILTIN_CMPUNORDPD,
14114 IX86_BUILTIN_CMPNEPD,
14115 IX86_BUILTIN_CMPEQSD,
14116 IX86_BUILTIN_CMPLTSD,
14117 IX86_BUILTIN_CMPLESD,
14118 IX86_BUILTIN_CMPNEQSD,
14119 IX86_BUILTIN_CMPNLTSD,
14120 IX86_BUILTIN_CMPNLESD,
14121 IX86_BUILTIN_CMPORDSD,
14122 IX86_BUILTIN_CMPUNORDSD,
14123 IX86_BUILTIN_CMPNESD,
14125 IX86_BUILTIN_COMIEQSD,
14126 IX86_BUILTIN_COMILTSD,
14127 IX86_BUILTIN_COMILESD,
14128 IX86_BUILTIN_COMIGTSD,
14129 IX86_BUILTIN_COMIGESD,
14130 IX86_BUILTIN_COMINEQSD,
14131 IX86_BUILTIN_UCOMIEQSD,
14132 IX86_BUILTIN_UCOMILTSD,
14133 IX86_BUILTIN_UCOMILESD,
14134 IX86_BUILTIN_UCOMIGTSD,
14135 IX86_BUILTIN_UCOMIGESD,
14136 IX86_BUILTIN_UCOMINEQSD,
14138 IX86_BUILTIN_MAXPD,
14139 IX86_BUILTIN_MAXSD,
14140 IX86_BUILTIN_MINPD,
14141 IX86_BUILTIN_MINSD,
14143 IX86_BUILTIN_ANDPD,
14144 IX86_BUILTIN_ANDNPD,
14146 IX86_BUILTIN_XORPD,
14148 IX86_BUILTIN_SQRTPD,
14149 IX86_BUILTIN_SQRTSD,
14151 IX86_BUILTIN_UNPCKHPD,
14152 IX86_BUILTIN_UNPCKLPD,
14154 IX86_BUILTIN_SHUFPD,
14156 IX86_BUILTIN_LOADUPD,
14157 IX86_BUILTIN_STOREUPD,
14158 IX86_BUILTIN_MOVSD,
14160 IX86_BUILTIN_LOADHPD,
14161 IX86_BUILTIN_LOADLPD,
14163 IX86_BUILTIN_CVTDQ2PD,
14164 IX86_BUILTIN_CVTDQ2PS,
14166 IX86_BUILTIN_CVTPD2DQ,
14167 IX86_BUILTIN_CVTPD2PI,
14168 IX86_BUILTIN_CVTPD2PS,
14169 IX86_BUILTIN_CVTTPD2DQ,
14170 IX86_BUILTIN_CVTTPD2PI,
14172 IX86_BUILTIN_CVTPI2PD,
14173 IX86_BUILTIN_CVTSI2SD,
14174 IX86_BUILTIN_CVTSI642SD,
14176 IX86_BUILTIN_CVTSD2SI,
14177 IX86_BUILTIN_CVTSD2SI64,
14178 IX86_BUILTIN_CVTSD2SS,
14179 IX86_BUILTIN_CVTSS2SD,
14180 IX86_BUILTIN_CVTTSD2SI,
14181 IX86_BUILTIN_CVTTSD2SI64,
14183 IX86_BUILTIN_CVTPS2DQ,
14184 IX86_BUILTIN_CVTPS2PD,
14185 IX86_BUILTIN_CVTTPS2DQ,
14187 IX86_BUILTIN_MOVNTI,
14188 IX86_BUILTIN_MOVNTPD,
14189 IX86_BUILTIN_MOVNTDQ,
14192 IX86_BUILTIN_MASKMOVDQU,
14193 IX86_BUILTIN_MOVMSKPD,
14194 IX86_BUILTIN_PMOVMSKB128,
14196 IX86_BUILTIN_PACKSSWB128,
14197 IX86_BUILTIN_PACKSSDW128,
14198 IX86_BUILTIN_PACKUSWB128,
14200 IX86_BUILTIN_PADDB128,
14201 IX86_BUILTIN_PADDW128,
14202 IX86_BUILTIN_PADDD128,
14203 IX86_BUILTIN_PADDQ128,
14204 IX86_BUILTIN_PADDSB128,
14205 IX86_BUILTIN_PADDSW128,
14206 IX86_BUILTIN_PADDUSB128,
14207 IX86_BUILTIN_PADDUSW128,
14208 IX86_BUILTIN_PSUBB128,
14209 IX86_BUILTIN_PSUBW128,
14210 IX86_BUILTIN_PSUBD128,
14211 IX86_BUILTIN_PSUBQ128,
14212 IX86_BUILTIN_PSUBSB128,
14213 IX86_BUILTIN_PSUBSW128,
14214 IX86_BUILTIN_PSUBUSB128,
14215 IX86_BUILTIN_PSUBUSW128,
14217 IX86_BUILTIN_PAND128,
14218 IX86_BUILTIN_PANDN128,
14219 IX86_BUILTIN_POR128,
14220 IX86_BUILTIN_PXOR128,
14222 IX86_BUILTIN_PAVGB128,
14223 IX86_BUILTIN_PAVGW128,
14225 IX86_BUILTIN_PCMPEQB128,
14226 IX86_BUILTIN_PCMPEQW128,
14227 IX86_BUILTIN_PCMPEQD128,
14228 IX86_BUILTIN_PCMPGTB128,
14229 IX86_BUILTIN_PCMPGTW128,
14230 IX86_BUILTIN_PCMPGTD128,
14232 IX86_BUILTIN_PMADDWD128,
14234 IX86_BUILTIN_PMAXSW128,
14235 IX86_BUILTIN_PMAXUB128,
14236 IX86_BUILTIN_PMINSW128,
14237 IX86_BUILTIN_PMINUB128,
14239 IX86_BUILTIN_PMULUDQ,
14240 IX86_BUILTIN_PMULUDQ128,
14241 IX86_BUILTIN_PMULHUW128,
14242 IX86_BUILTIN_PMULHW128,
14243 IX86_BUILTIN_PMULLW128,
14245 IX86_BUILTIN_PSADBW128,
14246 IX86_BUILTIN_PSHUFHW,
14247 IX86_BUILTIN_PSHUFLW,
14248 IX86_BUILTIN_PSHUFD,
14250 IX86_BUILTIN_PSLLW128,
14251 IX86_BUILTIN_PSLLD128,
14252 IX86_BUILTIN_PSLLQ128,
14253 IX86_BUILTIN_PSRAW128,
14254 IX86_BUILTIN_PSRAD128,
14255 IX86_BUILTIN_PSRLW128,
14256 IX86_BUILTIN_PSRLD128,
14257 IX86_BUILTIN_PSRLQ128,
14258 IX86_BUILTIN_PSLLDQI128,
14259 IX86_BUILTIN_PSLLWI128,
14260 IX86_BUILTIN_PSLLDI128,
14261 IX86_BUILTIN_PSLLQI128,
14262 IX86_BUILTIN_PSRAWI128,
14263 IX86_BUILTIN_PSRADI128,
14264 IX86_BUILTIN_PSRLDQI128,
14265 IX86_BUILTIN_PSRLWI128,
14266 IX86_BUILTIN_PSRLDI128,
14267 IX86_BUILTIN_PSRLQI128,
14269 IX86_BUILTIN_PUNPCKHBW128,
14270 IX86_BUILTIN_PUNPCKHWD128,
14271 IX86_BUILTIN_PUNPCKHDQ128,
14272 IX86_BUILTIN_PUNPCKHQDQ128,
14273 IX86_BUILTIN_PUNPCKLBW128,
14274 IX86_BUILTIN_PUNPCKLWD128,
14275 IX86_BUILTIN_PUNPCKLDQ128,
14276 IX86_BUILTIN_PUNPCKLQDQ128,
14278 IX86_BUILTIN_CLFLUSH,
14279 IX86_BUILTIN_MFENCE,
14280 IX86_BUILTIN_LFENCE,
14282 /* Prescott New Instructions. */
14283 IX86_BUILTIN_ADDSUBPS,
14284 IX86_BUILTIN_HADDPS,
14285 IX86_BUILTIN_HSUBPS,
14286 IX86_BUILTIN_MOVSHDUP,
14287 IX86_BUILTIN_MOVSLDUP,
14288 IX86_BUILTIN_ADDSUBPD,
14289 IX86_BUILTIN_HADDPD,
14290 IX86_BUILTIN_HSUBPD,
14291 IX86_BUILTIN_LDDQU,
14293 IX86_BUILTIN_MONITOR,
14294 IX86_BUILTIN_MWAIT,
14296 IX86_BUILTIN_VEC_INIT_V2SI,
14297 IX86_BUILTIN_VEC_INIT_V4HI,
14298 IX86_BUILTIN_VEC_INIT_V8QI,
14299 IX86_BUILTIN_VEC_EXT_V2DF,
14300 IX86_BUILTIN_VEC_EXT_V2DI,
14301 IX86_BUILTIN_VEC_EXT_V4SF,
14302 IX86_BUILTIN_VEC_EXT_V4SI,
14303 IX86_BUILTIN_VEC_EXT_V8HI,
14304 IX86_BUILTIN_VEC_EXT_V2SI,
14305 IX86_BUILTIN_VEC_EXT_V4HI,
14306 IX86_BUILTIN_VEC_SET_V8HI,
14307 IX86_BUILTIN_VEC_SET_V4HI,
14309 /* SSE2 ABI functions. */
14310 IX86_BUILTIN_SSE2_ACOS,
14311 IX86_BUILTIN_SSE2_ACOSF,
14312 IX86_BUILTIN_SSE2_ASIN,
14313 IX86_BUILTIN_SSE2_ASINF,
14314 IX86_BUILTIN_SSE2_ATAN,
14315 IX86_BUILTIN_SSE2_ATANF,
14316 IX86_BUILTIN_SSE2_ATAN2,
14317 IX86_BUILTIN_SSE2_ATAN2F,
14318 IX86_BUILTIN_SSE2_COS,
14319 IX86_BUILTIN_SSE2_COSF,
14320 IX86_BUILTIN_SSE2_EXP,
14321 IX86_BUILTIN_SSE2_EXPF,
14322 IX86_BUILTIN_SSE2_LOG10,
14323 IX86_BUILTIN_SSE2_LOG10F,
14324 IX86_BUILTIN_SSE2_LOG,
14325 IX86_BUILTIN_SSE2_LOGF,
14326 IX86_BUILTIN_SSE2_SIN,
14327 IX86_BUILTIN_SSE2_SINF,
14328 IX86_BUILTIN_SSE2_TAN,
14329 IX86_BUILTIN_SSE2_TANF,
14334 #define def_builtin(MASK, NAME, TYPE, CODE) \
14336 if ((MASK) & target_flags \
14337 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14338 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14339 NULL, NULL_TREE); \
14342 /* Bits for builtin_description.flag. */
14344 /* Set when we don't support the comparison natively, and should
14345 swap_comparison in order to support it. */
14346 #define BUILTIN_DESC_SWAP_OPERANDS 1
14348 struct builtin_description
14350 const unsigned int mask;
14351 const enum insn_code icode;
14352 const char *const name;
14353 const enum ix86_builtins code;
14354 const enum rtx_code comparison;
14355 const unsigned int flag;
14358 static const struct builtin_description bdesc_comi[] =
14360 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14361 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14362 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14363 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14364 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14365 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14366 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14367 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14368 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14369 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14370 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14371 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14372 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14373 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14374 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14375 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14376 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14377 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14378 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14379 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14380 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14381 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14382 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14383 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14386 static const struct builtin_description bdesc_2arg[] =
14389 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14390 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14391 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14392 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14393 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14394 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14395 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14396 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14398 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14399 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14400 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14401 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14402 BUILTIN_DESC_SWAP_OPERANDS },
14403 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14404 BUILTIN_DESC_SWAP_OPERANDS },
14405 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14406 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14407 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14408 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14409 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14410 BUILTIN_DESC_SWAP_OPERANDS },
14411 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14412 BUILTIN_DESC_SWAP_OPERANDS },
14413 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14414 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14415 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14416 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14417 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14418 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14419 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14420 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14421 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14422 BUILTIN_DESC_SWAP_OPERANDS },
14423 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14424 BUILTIN_DESC_SWAP_OPERANDS },
14425 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14427 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14428 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14429 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14430 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14432 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14433 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14434 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14435 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14437 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14438 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14439 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14440 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14441 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14444 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14445 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14446 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14447 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14448 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14449 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14450 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14451 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14453 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14454 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14455 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14456 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14457 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14458 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14459 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14460 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14462 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14463 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14464 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14466 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14467 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14468 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14469 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14471 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14472 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14474 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14475 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14476 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14477 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14478 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14479 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14481 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14482 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14483 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14484 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14486 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14487 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14488 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14489 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14490 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14491 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14494 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14495 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14496 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14498 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14499 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14500 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14502 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14503 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14504 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14505 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14506 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14507 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14509 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14510 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14511 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14512 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14513 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14514 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14516 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14517 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14518 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14519 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14521 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14522 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14525 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14526 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14527 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14528 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14529 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14530 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14531 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14532 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14534 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14535 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14536 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14537 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14538 BUILTIN_DESC_SWAP_OPERANDS },
14539 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14540 BUILTIN_DESC_SWAP_OPERANDS },
14541 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14542 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14543 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14544 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14545 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14546 BUILTIN_DESC_SWAP_OPERANDS },
14547 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14548 BUILTIN_DESC_SWAP_OPERANDS },
14549 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14550 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14551 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14552 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14553 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14554 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14555 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14556 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14557 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14559 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14560 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14561 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14562 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14564 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14565 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14566 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14567 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14569 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14570 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14571 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14574 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14575 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14576 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14577 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14578 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14579 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14580 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14581 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14583 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14584 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14585 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14586 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14587 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14588 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14589 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14590 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14592 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14595 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14597 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14598 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14600 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14601 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14603 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14604 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14605 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14606 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14607 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14608 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14610 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14611 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14612 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14613 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14615 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14616 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14617 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14618 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14619 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14620 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14621 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14622 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14624 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14625 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14626 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14628 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14629 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14631 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14632 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14634 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14635 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14636 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14638 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14639 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14640 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14642 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14643 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14645 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14647 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14648 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14649 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14650 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14653 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14654 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14655 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14656 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14657 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14658 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14661 static const struct builtin_description bdesc_1arg[] =
14663 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14664 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14666 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14667 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14668 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14670 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14671 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14672 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14673 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14674 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14675 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14677 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14678 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14680 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14682 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14683 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14685 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14686 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14687 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14688 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14689 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14691 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14693 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14694 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14695 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14696 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14698 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14699 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14700 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14703 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14704 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14708 ix86_init_builtins (void)
14711 ix86_init_mmx_sse_builtins ();
14713 ix86_init_sse_abi_builtins ();
14716 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14717 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14720 ix86_init_mmx_sse_builtins (void)
14722 const struct builtin_description * d;
14725 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14726 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14727 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14728 tree V2DI_type_node
14729 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14730 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14731 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14732 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14733 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14734 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14735 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14737 tree pchar_type_node = build_pointer_type (char_type_node);
14738 tree pcchar_type_node = build_pointer_type (
14739 build_type_variant (char_type_node, 1, 0));
14740 tree pfloat_type_node = build_pointer_type (float_type_node);
14741 tree pcfloat_type_node = build_pointer_type (
14742 build_type_variant (float_type_node, 1, 0));
14743 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14744 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14745 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14748 tree int_ftype_v4sf_v4sf
14749 = build_function_type_list (integer_type_node,
14750 V4SF_type_node, V4SF_type_node, NULL_TREE);
14751 tree v4si_ftype_v4sf_v4sf
14752 = build_function_type_list (V4SI_type_node,
14753 V4SF_type_node, V4SF_type_node, NULL_TREE);
14754 /* MMX/SSE/integer conversions. */
14755 tree int_ftype_v4sf
14756 = build_function_type_list (integer_type_node,
14757 V4SF_type_node, NULL_TREE);
14758 tree int64_ftype_v4sf
14759 = build_function_type_list (long_long_integer_type_node,
14760 V4SF_type_node, NULL_TREE);
14761 tree int_ftype_v8qi
14762 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14763 tree v4sf_ftype_v4sf_int
14764 = build_function_type_list (V4SF_type_node,
14765 V4SF_type_node, integer_type_node, NULL_TREE);
14766 tree v4sf_ftype_v4sf_int64
14767 = build_function_type_list (V4SF_type_node,
14768 V4SF_type_node, long_long_integer_type_node,
14770 tree v4sf_ftype_v4sf_v2si
14771 = build_function_type_list (V4SF_type_node,
14772 V4SF_type_node, V2SI_type_node, NULL_TREE);
14774 /* Miscellaneous. */
14775 tree v8qi_ftype_v4hi_v4hi
14776 = build_function_type_list (V8QI_type_node,
14777 V4HI_type_node, V4HI_type_node, NULL_TREE);
14778 tree v4hi_ftype_v2si_v2si
14779 = build_function_type_list (V4HI_type_node,
14780 V2SI_type_node, V2SI_type_node, NULL_TREE);
14781 tree v4sf_ftype_v4sf_v4sf_int
14782 = build_function_type_list (V4SF_type_node,
14783 V4SF_type_node, V4SF_type_node,
14784 integer_type_node, NULL_TREE);
14785 tree v2si_ftype_v4hi_v4hi
14786 = build_function_type_list (V2SI_type_node,
14787 V4HI_type_node, V4HI_type_node, NULL_TREE);
14788 tree v4hi_ftype_v4hi_int
14789 = build_function_type_list (V4HI_type_node,
14790 V4HI_type_node, integer_type_node, NULL_TREE);
14791 tree v4hi_ftype_v4hi_di
14792 = build_function_type_list (V4HI_type_node,
14793 V4HI_type_node, long_long_unsigned_type_node,
14795 tree v2si_ftype_v2si_di
14796 = build_function_type_list (V2SI_type_node,
14797 V2SI_type_node, long_long_unsigned_type_node,
14799 tree void_ftype_void
14800 = build_function_type (void_type_node, void_list_node);
14801 tree void_ftype_unsigned
14802 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14803 tree void_ftype_unsigned_unsigned
14804 = build_function_type_list (void_type_node, unsigned_type_node,
14805 unsigned_type_node, NULL_TREE);
14806 tree void_ftype_pcvoid_unsigned_unsigned
14807 = build_function_type_list (void_type_node, const_ptr_type_node,
14808 unsigned_type_node, unsigned_type_node,
14810 tree unsigned_ftype_void
14811 = build_function_type (unsigned_type_node, void_list_node);
14812 tree v2si_ftype_v4sf
14813 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14814 /* Loads/stores. */
14815 tree void_ftype_v8qi_v8qi_pchar
14816 = build_function_type_list (void_type_node,
14817 V8QI_type_node, V8QI_type_node,
14818 pchar_type_node, NULL_TREE);
14819 tree v4sf_ftype_pcfloat
14820 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14821 /* @@@ the type is bogus */
14822 tree v4sf_ftype_v4sf_pv2si
14823 = build_function_type_list (V4SF_type_node,
14824 V4SF_type_node, pv2si_type_node, NULL_TREE);
14825 tree void_ftype_pv2si_v4sf
14826 = build_function_type_list (void_type_node,
14827 pv2si_type_node, V4SF_type_node, NULL_TREE);
14828 tree void_ftype_pfloat_v4sf
14829 = build_function_type_list (void_type_node,
14830 pfloat_type_node, V4SF_type_node, NULL_TREE);
14831 tree void_ftype_pdi_di
14832 = build_function_type_list (void_type_node,
14833 pdi_type_node, long_long_unsigned_type_node,
14835 tree void_ftype_pv2di_v2di
14836 = build_function_type_list (void_type_node,
14837 pv2di_type_node, V2DI_type_node, NULL_TREE);
14838 /* Normal vector unops. */
14839 tree v4sf_ftype_v4sf
14840 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14842 /* Normal vector binops. */
14843 tree v4sf_ftype_v4sf_v4sf
14844 = build_function_type_list (V4SF_type_node,
14845 V4SF_type_node, V4SF_type_node, NULL_TREE);
14846 tree v8qi_ftype_v8qi_v8qi
14847 = build_function_type_list (V8QI_type_node,
14848 V8QI_type_node, V8QI_type_node, NULL_TREE);
14849 tree v4hi_ftype_v4hi_v4hi
14850 = build_function_type_list (V4HI_type_node,
14851 V4HI_type_node, V4HI_type_node, NULL_TREE);
14852 tree v2si_ftype_v2si_v2si
14853 = build_function_type_list (V2SI_type_node,
14854 V2SI_type_node, V2SI_type_node, NULL_TREE);
14855 tree di_ftype_di_di
14856 = build_function_type_list (long_long_unsigned_type_node,
14857 long_long_unsigned_type_node,
14858 long_long_unsigned_type_node, NULL_TREE);
14860 tree v2si_ftype_v2sf
14861 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14862 tree v2sf_ftype_v2si
14863 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14864 tree v2si_ftype_v2si
14865 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14866 tree v2sf_ftype_v2sf
14867 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14868 tree v2sf_ftype_v2sf_v2sf
14869 = build_function_type_list (V2SF_type_node,
14870 V2SF_type_node, V2SF_type_node, NULL_TREE);
14871 tree v2si_ftype_v2sf_v2sf
14872 = build_function_type_list (V2SI_type_node,
14873 V2SF_type_node, V2SF_type_node, NULL_TREE);
14874 tree pint_type_node = build_pointer_type (integer_type_node);
14875 tree pdouble_type_node = build_pointer_type (double_type_node);
14876 tree pcdouble_type_node = build_pointer_type (
14877 build_type_variant (double_type_node, 1, 0));
14878 tree int_ftype_v2df_v2df
14879 = build_function_type_list (integer_type_node,
14880 V2DF_type_node, V2DF_type_node, NULL_TREE);
14882 tree void_ftype_pcvoid
14883 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14884 tree v4sf_ftype_v4si
14885 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14886 tree v4si_ftype_v4sf
14887 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14888 tree v2df_ftype_v4si
14889 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14890 tree v4si_ftype_v2df
14891 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14892 tree v2si_ftype_v2df
14893 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14894 tree v4sf_ftype_v2df
14895 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14896 tree v2df_ftype_v2si
14897 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14898 tree v2df_ftype_v4sf
14899 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14900 tree int_ftype_v2df
14901 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14902 tree int64_ftype_v2df
14903 = build_function_type_list (long_long_integer_type_node,
14904 V2DF_type_node, NULL_TREE);
14905 tree v2df_ftype_v2df_int
14906 = build_function_type_list (V2DF_type_node,
14907 V2DF_type_node, integer_type_node, NULL_TREE);
14908 tree v2df_ftype_v2df_int64
14909 = build_function_type_list (V2DF_type_node,
14910 V2DF_type_node, long_long_integer_type_node,
14912 tree v4sf_ftype_v4sf_v2df
14913 = build_function_type_list (V4SF_type_node,
14914 V4SF_type_node, V2DF_type_node, NULL_TREE);
14915 tree v2df_ftype_v2df_v4sf
14916 = build_function_type_list (V2DF_type_node,
14917 V2DF_type_node, V4SF_type_node, NULL_TREE);
14918 tree v2df_ftype_v2df_v2df_int
14919 = build_function_type_list (V2DF_type_node,
14920 V2DF_type_node, V2DF_type_node,
14923 tree v2df_ftype_v2df_pcdouble
14924 = build_function_type_list (V2DF_type_node,
14925 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14926 tree void_ftype_pdouble_v2df
14927 = build_function_type_list (void_type_node,
14928 pdouble_type_node, V2DF_type_node, NULL_TREE);
14929 tree void_ftype_pint_int
14930 = build_function_type_list (void_type_node,
14931 pint_type_node, integer_type_node, NULL_TREE);
14932 tree void_ftype_v16qi_v16qi_pchar
14933 = build_function_type_list (void_type_node,
14934 V16QI_type_node, V16QI_type_node,
14935 pchar_type_node, NULL_TREE);
14936 tree v2df_ftype_pcdouble
14937 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14938 tree v2df_ftype_v2df_v2df
14939 = build_function_type_list (V2DF_type_node,
14940 V2DF_type_node, V2DF_type_node, NULL_TREE);
14941 tree v16qi_ftype_v16qi_v16qi
14942 = build_function_type_list (V16QI_type_node,
14943 V16QI_type_node, V16QI_type_node, NULL_TREE);
14944 tree v8hi_ftype_v8hi_v8hi
14945 = build_function_type_list (V8HI_type_node,
14946 V8HI_type_node, V8HI_type_node, NULL_TREE);
14947 tree v4si_ftype_v4si_v4si
14948 = build_function_type_list (V4SI_type_node,
14949 V4SI_type_node, V4SI_type_node, NULL_TREE);
14950 tree v2di_ftype_v2di_v2di
14951 = build_function_type_list (V2DI_type_node,
14952 V2DI_type_node, V2DI_type_node, NULL_TREE);
14953 tree v2di_ftype_v2df_v2df
14954 = build_function_type_list (V2DI_type_node,
14955 V2DF_type_node, V2DF_type_node, NULL_TREE);
14956 tree v2df_ftype_v2df
14957 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14958 tree v2di_ftype_v2di_int
14959 = build_function_type_list (V2DI_type_node,
14960 V2DI_type_node, integer_type_node, NULL_TREE);
14961 tree v4si_ftype_v4si_int
14962 = build_function_type_list (V4SI_type_node,
14963 V4SI_type_node, integer_type_node, NULL_TREE);
14964 tree v8hi_ftype_v8hi_int
14965 = build_function_type_list (V8HI_type_node,
14966 V8HI_type_node, integer_type_node, NULL_TREE);
14967 tree v8hi_ftype_v8hi_v2di
14968 = build_function_type_list (V8HI_type_node,
14969 V8HI_type_node, V2DI_type_node, NULL_TREE);
14970 tree v4si_ftype_v4si_v2di
14971 = build_function_type_list (V4SI_type_node,
14972 V4SI_type_node, V2DI_type_node, NULL_TREE);
14973 tree v4si_ftype_v8hi_v8hi
14974 = build_function_type_list (V4SI_type_node,
14975 V8HI_type_node, V8HI_type_node, NULL_TREE);
14976 tree di_ftype_v8qi_v8qi
14977 = build_function_type_list (long_long_unsigned_type_node,
14978 V8QI_type_node, V8QI_type_node, NULL_TREE);
14979 tree di_ftype_v2si_v2si
14980 = build_function_type_list (long_long_unsigned_type_node,
14981 V2SI_type_node, V2SI_type_node, NULL_TREE);
14982 tree v2di_ftype_v16qi_v16qi
14983 = build_function_type_list (V2DI_type_node,
14984 V16QI_type_node, V16QI_type_node, NULL_TREE);
14985 tree v2di_ftype_v4si_v4si
14986 = build_function_type_list (V2DI_type_node,
14987 V4SI_type_node, V4SI_type_node, NULL_TREE);
14988 tree int_ftype_v16qi
14989 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14990 tree v16qi_ftype_pcchar
14991 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14992 tree void_ftype_pchar_v16qi
14993 = build_function_type_list (void_type_node,
14994 pchar_type_node, V16QI_type_node, NULL_TREE);
14997 tree float128_type;
15000 /* The __float80 type. */
15001 if (TYPE_MODE (long_double_type_node) == XFmode)
15002 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15006 /* The __float80 type. */
15007 float80_type = make_node (REAL_TYPE);
15008 TYPE_PRECISION (float80_type) = 80;
15009 layout_type (float80_type);
15010 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15015 float128_type = make_node (REAL_TYPE);
15016 TYPE_PRECISION (float128_type) = 128;
15017 layout_type (float128_type);
15018 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15021 /* Add all builtins that are more or less simple operations on two
15023 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15025 /* Use one of the operands; the target can have a different mode for
15026 mask-generating compares. */
15027 enum machine_mode mode;
15032 mode = insn_data[d->icode].operand[1].mode;
15037 type = v16qi_ftype_v16qi_v16qi;
15040 type = v8hi_ftype_v8hi_v8hi;
15043 type = v4si_ftype_v4si_v4si;
15046 type = v2di_ftype_v2di_v2di;
15049 type = v2df_ftype_v2df_v2df;
15052 type = v4sf_ftype_v4sf_v4sf;
15055 type = v8qi_ftype_v8qi_v8qi;
15058 type = v4hi_ftype_v4hi_v4hi;
15061 type = v2si_ftype_v2si_v2si;
15064 type = di_ftype_di_di;
15068 gcc_unreachable ();
15071 /* Override for comparisons. */
15072 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15073 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15074 type = v4si_ftype_v4sf_v4sf;
15076 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15077 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15078 type = v2di_ftype_v2df_v2df;
15080 def_builtin (d->mask, d->name, type, d->code);
15083 /* Add the remaining MMX insns with somewhat more complicated types. */
15084 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15085 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15086 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15087 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15089 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15090 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15091 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15093 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15094 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15096 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15097 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15099 /* comi/ucomi insns. */
15100 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15101 if (d->mask == MASK_SSE2)
15102 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15104 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15106 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15107 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15108 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15110 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15111 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15112 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15113 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15114 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15115 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15116 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15117 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15118 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15119 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15120 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15122 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15124 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15125 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15127 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15128 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15129 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15130 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15132 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15133 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15134 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15135 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15137 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15139 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15141 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15142 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15143 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15144 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15145 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15146 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15148 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15150 /* Original 3DNow! */
15151 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15152 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15153 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15154 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15155 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15156 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15157 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15158 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15159 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15160 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15161 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15162 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15163 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15164 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15165 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15166 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15167 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15168 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15169 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15170 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15172 /* 3DNow! extension as used in the Athlon CPU. */
15173 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15174 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15175 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15176 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15177 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15178 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15181 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15183 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15184 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15186 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15187 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15189 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15190 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15191 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15192 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15193 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15195 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15196 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15197 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15198 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15200 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15201 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15203 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15205 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15206 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15208 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15209 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15210 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15211 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15212 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15214 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15216 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15217 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15218 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15219 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15221 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15222 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15223 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15225 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15226 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15227 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15228 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15230 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15231 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15232 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15234 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15235 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15237 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15238 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15240 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15241 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15242 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15244 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15245 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15246 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15248 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15249 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15251 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15252 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15253 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15254 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15256 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15257 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15258 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15259 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15261 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15262 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15264 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15266 /* Prescott New Instructions. */
15267 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15268 void_ftype_pcvoid_unsigned_unsigned,
15269 IX86_BUILTIN_MONITOR);
15270 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15271 void_ftype_unsigned_unsigned,
15272 IX86_BUILTIN_MWAIT);
15273 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15275 IX86_BUILTIN_MOVSHDUP);
15276 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15278 IX86_BUILTIN_MOVSLDUP);
15279 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15280 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15282 /* Access to the vec_init patterns. */
15283 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15284 integer_type_node, NULL_TREE);
15285 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15286 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15288 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15289 short_integer_type_node,
15290 short_integer_type_node,
15291 short_integer_type_node, NULL_TREE);
15292 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15293 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15295 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15296 char_type_node, char_type_node,
15297 char_type_node, char_type_node,
15298 char_type_node, char_type_node,
15299 char_type_node, NULL_TREE);
15300 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15301 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15303 /* Access to the vec_extract patterns. */
15304 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15305 integer_type_node, NULL_TREE);
15306 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15307 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15309 ftype = build_function_type_list (long_long_integer_type_node,
15310 V2DI_type_node, integer_type_node,
15312 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15313 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15315 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15316 integer_type_node, NULL_TREE);
15317 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15318 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15320 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15321 integer_type_node, NULL_TREE);
15322 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15323 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15325 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15326 integer_type_node, NULL_TREE);
15327 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15328 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15330 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15331 integer_type_node, NULL_TREE);
15332 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15333 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15335 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15336 integer_type_node, NULL_TREE);
15337 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15338 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15340 /* Access to the vec_set patterns. */
15341 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15343 integer_type_node, NULL_TREE);
15344 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15345 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15347 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15349 integer_type_node, NULL_TREE);
15350 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15351 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15355 /* Set up all the SSE ABI builtins that we may use to override
15356 the normal builtins. */
15358 ix86_init_sse_abi_builtins (void)
15362 /* Bail out in case the template definitions are not available. */
15363 if (! built_in_decls [BUILT_IN_SIN]
15364 || ! built_in_decls [BUILT_IN_SINF]
15365 || ! built_in_decls [BUILT_IN_ATAN2]
15366 || ! built_in_decls [BUILT_IN_ATAN2F])
15369 /* Build the function types as variants of the existing ones. */
15370 flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
15371 TYPE_ATTRIBUTES (flt)
15372 = tree_cons (get_identifier ("sseregparm"),
15373 NULL_TREE, TYPE_ATTRIBUTES (flt));
15374 flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
15375 TYPE_ATTRIBUTES (flt2)
15376 = tree_cons (get_identifier ("sseregparm"),
15377 NULL_TREE, TYPE_ATTRIBUTES (flt2));
15379 #define def_builtin(capname, name, type) \
15380 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15381 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15382 IX86_BUILTIN_SSE2_ ## capname, \
15384 "__libm_sse2_" # name, NULL_TREE)
15386 def_builtin (ACOSF, acosf, flt);
15387 def_builtin (ASINF, asinf, flt);
15388 def_builtin (ATANF, atanf, flt);
15389 def_builtin (ATAN2F, atan2f, flt2);
15390 def_builtin (COSF, cosf, flt);
15391 def_builtin (EXPF, expf, flt);
15392 def_builtin (LOG10F, log10f, flt);
15393 def_builtin (LOGF, logf, flt);
15394 def_builtin (SINF, sinf, flt);
15395 def_builtin (TANF, tanf, flt);
15400 /* Errors in the source file can cause expand_expr to return const0_rtx
15401 where we expect a vector. To avoid crashing, use one of the vector
15402 clear instructions. */
15404 safe_vector_operand (rtx x, enum machine_mode mode)
15406 if (x == const0_rtx)
15407 x = CONST0_RTX (mode);
15411 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15414 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15417 tree arg0 = TREE_VALUE (arglist);
15418 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15419 rtx op0 = expand_normal (arg0);
15420 rtx op1 = expand_normal (arg1);
15421 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15422 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15423 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15425 if (VECTOR_MODE_P (mode0))
15426 op0 = safe_vector_operand (op0, mode0);
15427 if (VECTOR_MODE_P (mode1))
15428 op1 = safe_vector_operand (op1, mode1);
15430 if (optimize || !target
15431 || GET_MODE (target) != tmode
15432 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15433 target = gen_reg_rtx (tmode);
15435 if (GET_MODE (op1) == SImode && mode1 == TImode)
15437 rtx x = gen_reg_rtx (V4SImode);
15438 emit_insn (gen_sse2_loadd (x, op1));
15439 op1 = gen_lowpart (TImode, x);
15442 /* The insn must want input operands in the same modes as the
15444 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15445 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15447 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15448 op0 = copy_to_mode_reg (mode0, op0);
15449 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15450 op1 = copy_to_mode_reg (mode1, op1);
15452 /* ??? Using ix86_fixup_binary_operands is problematic when
15453 we've got mismatched modes. Fake it. */
15459 if (tmode == mode0 && tmode == mode1)
15461 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15465 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15467 op0 = force_reg (mode0, op0);
15468 op1 = force_reg (mode1, op1);
15469 target = gen_reg_rtx (tmode);
15472 pat = GEN_FCN (icode) (target, op0, op1);
15479 /* Subroutine of ix86_expand_builtin to take care of stores. */
15482 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15485 tree arg0 = TREE_VALUE (arglist);
15486 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15487 rtx op0 = expand_normal (arg0);
15488 rtx op1 = expand_normal (arg1);
15489 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15490 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15492 if (VECTOR_MODE_P (mode1))
15493 op1 = safe_vector_operand (op1, mode1);
15495 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15496 op1 = copy_to_mode_reg (mode1, op1);
15498 pat = GEN_FCN (icode) (op0, op1);
15504 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15507 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15508 rtx target, int do_load)
15511 tree arg0 = TREE_VALUE (arglist);
15512 rtx op0 = expand_normal (arg0);
15513 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15514 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15516 if (optimize || !target
15517 || GET_MODE (target) != tmode
15518 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15519 target = gen_reg_rtx (tmode);
15521 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15524 if (VECTOR_MODE_P (mode0))
15525 op0 = safe_vector_operand (op0, mode0);
15527 if ((optimize && !register_operand (op0, mode0))
15528 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15529 op0 = copy_to_mode_reg (mode0, op0);
15532 pat = GEN_FCN (icode) (target, op0);
15539 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15540 sqrtss, rsqrtss, rcpss. */
15543 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15546 tree arg0 = TREE_VALUE (arglist);
15547 rtx op1, op0 = expand_normal (arg0);
15548 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15549 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15551 if (optimize || !target
15552 || GET_MODE (target) != tmode
15553 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15554 target = gen_reg_rtx (tmode);
15556 if (VECTOR_MODE_P (mode0))
15557 op0 = safe_vector_operand (op0, mode0);
15559 if ((optimize && !register_operand (op0, mode0))
15560 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15561 op0 = copy_to_mode_reg (mode0, op0);
15564 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15565 op1 = copy_to_mode_reg (mode0, op1);
15567 pat = GEN_FCN (icode) (target, op0, op1);
15574 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15577 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15581 tree arg0 = TREE_VALUE (arglist);
15582 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15583 rtx op0 = expand_normal (arg0);
15584 rtx op1 = expand_normal (arg1);
15586 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15587 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15588 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15589 enum rtx_code comparison = d->comparison;
15591 if (VECTOR_MODE_P (mode0))
15592 op0 = safe_vector_operand (op0, mode0);
15593 if (VECTOR_MODE_P (mode1))
15594 op1 = safe_vector_operand (op1, mode1);
15596 /* Swap operands if we have a comparison that isn't available in
15598 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15600 rtx tmp = gen_reg_rtx (mode1);
15601 emit_move_insn (tmp, op1);
15606 if (optimize || !target
15607 || GET_MODE (target) != tmode
15608 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15609 target = gen_reg_rtx (tmode);
15611 if ((optimize && !register_operand (op0, mode0))
15612 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15613 op0 = copy_to_mode_reg (mode0, op0);
15614 if ((optimize && !register_operand (op1, mode1))
15615 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15616 op1 = copy_to_mode_reg (mode1, op1);
15618 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15619 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15626 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15629 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15633 tree arg0 = TREE_VALUE (arglist);
15634 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15635 rtx op0 = expand_normal (arg0);
15636 rtx op1 = expand_normal (arg1);
15638 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15639 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15640 enum rtx_code comparison = d->comparison;
15642 if (VECTOR_MODE_P (mode0))
15643 op0 = safe_vector_operand (op0, mode0);
15644 if (VECTOR_MODE_P (mode1))
15645 op1 = safe_vector_operand (op1, mode1);
15647 /* Swap operands if we have a comparison that isn't available in
15649 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15656 target = gen_reg_rtx (SImode);
15657 emit_move_insn (target, const0_rtx);
15658 target = gen_rtx_SUBREG (QImode, target, 0);
15660 if ((optimize && !register_operand (op0, mode0))
15661 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15662 op0 = copy_to_mode_reg (mode0, op0);
15663 if ((optimize && !register_operand (op1, mode1))
15664 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15665 op1 = copy_to_mode_reg (mode1, op1);
15667 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15668 pat = GEN_FCN (d->icode) (op0, op1);
15672 emit_insn (gen_rtx_SET (VOIDmode,
15673 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15674 gen_rtx_fmt_ee (comparison, QImode,
15678 return SUBREG_REG (target);
15681 /* Return the integer constant in ARG. Constrain it to be in the range
15682 of the subparts of VEC_TYPE; issue an error if not. */
15685 get_element_number (tree vec_type, tree arg)
15687 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15689 if (!host_integerp (arg, 1)
15690 || (elt = tree_low_cst (arg, 1), elt > max))
15692 error ("selector must be an integer constant in the range 0..%wi", max);
15699 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15700 ix86_expand_vector_init. We DO have language-level syntax for this, in
15701 the form of (type){ init-list }. Except that since we can't place emms
15702 instructions from inside the compiler, we can't allow the use of MMX
15703 registers unless the user explicitly asks for it. So we do *not* define
15704 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15705 we have builtins invoked by mmintrin.h that gives us license to emit
15706 these sorts of instructions. */
15709 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15711 enum machine_mode tmode = TYPE_MODE (type);
15712 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15713 int i, n_elt = GET_MODE_NUNITS (tmode);
15714 rtvec v = rtvec_alloc (n_elt);
15716 gcc_assert (VECTOR_MODE_P (tmode));
15718 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15720 rtx x = expand_normal (TREE_VALUE (arglist));
15721 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15724 gcc_assert (arglist == NULL);
15726 if (!target || !register_operand (target, tmode))
15727 target = gen_reg_rtx (tmode);
15729 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15733 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15734 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15735 had a language-level syntax for referencing vector elements. */
15738 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15740 enum machine_mode tmode, mode0;
15745 arg0 = TREE_VALUE (arglist);
15746 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15748 op0 = expand_normal (arg0);
15749 elt = get_element_number (TREE_TYPE (arg0), arg1);
15751 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15752 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15753 gcc_assert (VECTOR_MODE_P (mode0));
15755 op0 = force_reg (mode0, op0);
15757 if (optimize || !target || !register_operand (target, tmode))
15758 target = gen_reg_rtx (tmode);
15760 ix86_expand_vector_extract (true, target, op0, elt);
15765 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15766 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15767 a language-level syntax for referencing vector elements. */
15770 ix86_expand_vec_set_builtin (tree arglist)
15772 enum machine_mode tmode, mode1;
15773 tree arg0, arg1, arg2;
15777 arg0 = TREE_VALUE (arglist);
15778 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15779 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15781 tmode = TYPE_MODE (TREE_TYPE (arg0));
15782 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15783 gcc_assert (VECTOR_MODE_P (tmode));
15785 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15786 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15787 elt = get_element_number (TREE_TYPE (arg0), arg2);
15789 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15790 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15792 op0 = force_reg (tmode, op0);
15793 op1 = force_reg (mode1, op1);
15795 ix86_expand_vector_set (true, op0, op1, elt);
15800 /* Expand an expression EXP that calls a built-in function,
15801 with result going to TARGET if that's convenient
15802 (and in mode MODE if that's convenient).
15803 SUBTARGET may be used as the target for computing one of EXP's operands.
15804 IGNORE is nonzero if the value is to be ignored. */
15807 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15808 enum machine_mode mode ATTRIBUTE_UNUSED,
15809 int ignore ATTRIBUTE_UNUSED)
15811 const struct builtin_description *d;
15813 enum insn_code icode;
15814 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15815 tree arglist = TREE_OPERAND (exp, 1);
15816 tree arg0, arg1, arg2;
15817 rtx op0, op1, op2, pat;
15818 enum machine_mode tmode, mode0, mode1, mode2;
15819 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15823 case IX86_BUILTIN_EMMS:
15824 emit_insn (gen_mmx_emms ());
15827 case IX86_BUILTIN_SFENCE:
15828 emit_insn (gen_sse_sfence ());
15831 case IX86_BUILTIN_MASKMOVQ:
15832 case IX86_BUILTIN_MASKMOVDQU:
15833 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15834 ? CODE_FOR_mmx_maskmovq
15835 : CODE_FOR_sse2_maskmovdqu);
15836 /* Note the arg order is different from the operand order. */
15837 arg1 = TREE_VALUE (arglist);
15838 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15839 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15840 op0 = expand_normal (arg0);
15841 op1 = expand_normal (arg1);
15842 op2 = expand_normal (arg2);
15843 mode0 = insn_data[icode].operand[0].mode;
15844 mode1 = insn_data[icode].operand[1].mode;
15845 mode2 = insn_data[icode].operand[2].mode;
15847 op0 = force_reg (Pmode, op0);
15848 op0 = gen_rtx_MEM (mode1, op0);
15850 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15851 op0 = copy_to_mode_reg (mode0, op0);
15852 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15853 op1 = copy_to_mode_reg (mode1, op1);
15854 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15855 op2 = copy_to_mode_reg (mode2, op2);
15856 pat = GEN_FCN (icode) (op0, op1, op2);
15862 case IX86_BUILTIN_SQRTSS:
15863 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15864 case IX86_BUILTIN_RSQRTSS:
15865 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15866 case IX86_BUILTIN_RCPSS:
15867 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15869 case IX86_BUILTIN_LOADUPS:
15870 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15872 case IX86_BUILTIN_STOREUPS:
15873 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15875 case IX86_BUILTIN_LOADHPS:
15876 case IX86_BUILTIN_LOADLPS:
15877 case IX86_BUILTIN_LOADHPD:
15878 case IX86_BUILTIN_LOADLPD:
15879 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15880 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15881 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15882 : CODE_FOR_sse2_loadlpd);
15883 arg0 = TREE_VALUE (arglist);
15884 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15885 op0 = expand_normal (arg0);
15886 op1 = expand_normal (arg1);
15887 tmode = insn_data[icode].operand[0].mode;
15888 mode0 = insn_data[icode].operand[1].mode;
15889 mode1 = insn_data[icode].operand[2].mode;
15891 op0 = force_reg (mode0, op0);
15892 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15893 if (optimize || target == 0
15894 || GET_MODE (target) != tmode
15895 || !register_operand (target, tmode))
15896 target = gen_reg_rtx (tmode);
15897 pat = GEN_FCN (icode) (target, op0, op1);
15903 case IX86_BUILTIN_STOREHPS:
15904 case IX86_BUILTIN_STORELPS:
15905 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15906 : CODE_FOR_sse_storelps);
15907 arg0 = TREE_VALUE (arglist);
15908 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15909 op0 = expand_normal (arg0);
15910 op1 = expand_normal (arg1);
15911 mode0 = insn_data[icode].operand[0].mode;
15912 mode1 = insn_data[icode].operand[1].mode;
15914 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15915 op1 = force_reg (mode1, op1);
15917 pat = GEN_FCN (icode) (op0, op1);
15923 case IX86_BUILTIN_MOVNTPS:
15924 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15925 case IX86_BUILTIN_MOVNTQ:
15926 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15928 case IX86_BUILTIN_LDMXCSR:
15929 op0 = expand_normal (TREE_VALUE (arglist));
15930 target = assign_386_stack_local (SImode, SLOT_TEMP);
15931 emit_move_insn (target, op0);
15932 emit_insn (gen_sse_ldmxcsr (target));
15935 case IX86_BUILTIN_STMXCSR:
15936 target = assign_386_stack_local (SImode, SLOT_TEMP);
15937 emit_insn (gen_sse_stmxcsr (target));
15938 return copy_to_mode_reg (SImode, target);
15940 case IX86_BUILTIN_SHUFPS:
15941 case IX86_BUILTIN_SHUFPD:
15942 icode = (fcode == IX86_BUILTIN_SHUFPS
15943 ? CODE_FOR_sse_shufps
15944 : CODE_FOR_sse2_shufpd);
15945 arg0 = TREE_VALUE (arglist);
15946 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15947 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15948 op0 = expand_normal (arg0);
15949 op1 = expand_normal (arg1);
15950 op2 = expand_normal (arg2);
15951 tmode = insn_data[icode].operand[0].mode;
15952 mode0 = insn_data[icode].operand[1].mode;
15953 mode1 = insn_data[icode].operand[2].mode;
15954 mode2 = insn_data[icode].operand[3].mode;
15956 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15957 op0 = copy_to_mode_reg (mode0, op0);
15958 if ((optimize && !register_operand (op1, mode1))
15959 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15960 op1 = copy_to_mode_reg (mode1, op1);
15961 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15963 /* @@@ better error message */
15964 error ("mask must be an immediate");
15965 return gen_reg_rtx (tmode);
15967 if (optimize || target == 0
15968 || GET_MODE (target) != tmode
15969 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15970 target = gen_reg_rtx (tmode);
15971 pat = GEN_FCN (icode) (target, op0, op1, op2);
15977 case IX86_BUILTIN_PSHUFW:
15978 case IX86_BUILTIN_PSHUFD:
15979 case IX86_BUILTIN_PSHUFHW:
15980 case IX86_BUILTIN_PSHUFLW:
15981 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15982 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15983 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15984 : CODE_FOR_mmx_pshufw);
15985 arg0 = TREE_VALUE (arglist);
15986 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15987 op0 = expand_normal (arg0);
15988 op1 = expand_normal (arg1);
15989 tmode = insn_data[icode].operand[0].mode;
15990 mode1 = insn_data[icode].operand[1].mode;
15991 mode2 = insn_data[icode].operand[2].mode;
15993 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15994 op0 = copy_to_mode_reg (mode1, op0);
15995 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15997 /* @@@ better error message */
15998 error ("mask must be an immediate");
16002 || GET_MODE (target) != tmode
16003 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16004 target = gen_reg_rtx (tmode);
16005 pat = GEN_FCN (icode) (target, op0, op1);
16011 case IX86_BUILTIN_PSLLDQI128:
16012 case IX86_BUILTIN_PSRLDQI128:
16013 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16014 : CODE_FOR_sse2_lshrti3);
16015 arg0 = TREE_VALUE (arglist);
16016 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16017 op0 = expand_normal (arg0);
16018 op1 = expand_normal (arg1);
16019 tmode = insn_data[icode].operand[0].mode;
16020 mode1 = insn_data[icode].operand[1].mode;
16021 mode2 = insn_data[icode].operand[2].mode;
16023 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16025 op0 = copy_to_reg (op0);
16026 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16028 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16030 error ("shift must be an immediate");
16033 target = gen_reg_rtx (V2DImode);
16034 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16040 case IX86_BUILTIN_FEMMS:
16041 emit_insn (gen_mmx_femms ());
16044 case IX86_BUILTIN_PAVGUSB:
16045 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16047 case IX86_BUILTIN_PF2ID:
16048 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16050 case IX86_BUILTIN_PFACC:
16051 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16053 case IX86_BUILTIN_PFADD:
16054 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16056 case IX86_BUILTIN_PFCMPEQ:
16057 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16059 case IX86_BUILTIN_PFCMPGE:
16060 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16062 case IX86_BUILTIN_PFCMPGT:
16063 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16065 case IX86_BUILTIN_PFMAX:
16066 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16068 case IX86_BUILTIN_PFMIN:
16069 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16071 case IX86_BUILTIN_PFMUL:
16072 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16074 case IX86_BUILTIN_PFRCP:
16075 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16077 case IX86_BUILTIN_PFRCPIT1:
16078 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16080 case IX86_BUILTIN_PFRCPIT2:
16081 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16083 case IX86_BUILTIN_PFRSQIT1:
16084 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16086 case IX86_BUILTIN_PFRSQRT:
16087 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16089 case IX86_BUILTIN_PFSUB:
16090 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16092 case IX86_BUILTIN_PFSUBR:
16093 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16095 case IX86_BUILTIN_PI2FD:
16096 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16098 case IX86_BUILTIN_PMULHRW:
16099 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16101 case IX86_BUILTIN_PF2IW:
16102 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16104 case IX86_BUILTIN_PFNACC:
16105 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16107 case IX86_BUILTIN_PFPNACC:
16108 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16110 case IX86_BUILTIN_PI2FW:
16111 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16113 case IX86_BUILTIN_PSWAPDSI:
16114 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16116 case IX86_BUILTIN_PSWAPDSF:
16117 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16119 case IX86_BUILTIN_SQRTSD:
16120 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16121 case IX86_BUILTIN_LOADUPD:
16122 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16123 case IX86_BUILTIN_STOREUPD:
16124 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16126 case IX86_BUILTIN_MFENCE:
16127 emit_insn (gen_sse2_mfence ());
16129 case IX86_BUILTIN_LFENCE:
16130 emit_insn (gen_sse2_lfence ());
16133 case IX86_BUILTIN_CLFLUSH:
16134 arg0 = TREE_VALUE (arglist);
16135 op0 = expand_normal (arg0);
16136 icode = CODE_FOR_sse2_clflush;
16137 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16138 op0 = copy_to_mode_reg (Pmode, op0);
16140 emit_insn (gen_sse2_clflush (op0));
16143 case IX86_BUILTIN_MOVNTPD:
16144 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16145 case IX86_BUILTIN_MOVNTDQ:
16146 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16147 case IX86_BUILTIN_MOVNTI:
16148 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16150 case IX86_BUILTIN_LOADDQU:
16151 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16152 case IX86_BUILTIN_STOREDQU:
16153 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16155 case IX86_BUILTIN_MONITOR:
16156 arg0 = TREE_VALUE (arglist);
16157 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16158 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16159 op0 = expand_normal (arg0);
16160 op1 = expand_normal (arg1);
16161 op2 = expand_normal (arg2);
16163 op0 = copy_to_mode_reg (SImode, op0);
16165 op1 = copy_to_mode_reg (SImode, op1);
16167 op2 = copy_to_mode_reg (SImode, op2);
16168 emit_insn (gen_sse3_monitor (op0, op1, op2));
16171 case IX86_BUILTIN_MWAIT:
16172 arg0 = TREE_VALUE (arglist);
16173 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16174 op0 = expand_normal (arg0);
16175 op1 = expand_normal (arg1);
16177 op0 = copy_to_mode_reg (SImode, op0);
16179 op1 = copy_to_mode_reg (SImode, op1);
16180 emit_insn (gen_sse3_mwait (op0, op1));
16183 case IX86_BUILTIN_LDDQU:
16184 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16187 case IX86_BUILTIN_VEC_INIT_V2SI:
16188 case IX86_BUILTIN_VEC_INIT_V4HI:
16189 case IX86_BUILTIN_VEC_INIT_V8QI:
16190 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16192 case IX86_BUILTIN_VEC_EXT_V2DF:
16193 case IX86_BUILTIN_VEC_EXT_V2DI:
16194 case IX86_BUILTIN_VEC_EXT_V4SF:
16195 case IX86_BUILTIN_VEC_EXT_V4SI:
16196 case IX86_BUILTIN_VEC_EXT_V8HI:
16197 case IX86_BUILTIN_VEC_EXT_V2SI:
16198 case IX86_BUILTIN_VEC_EXT_V4HI:
16199 return ix86_expand_vec_ext_builtin (arglist, target);
16201 case IX86_BUILTIN_VEC_SET_V8HI:
16202 case IX86_BUILTIN_VEC_SET_V4HI:
16203 return ix86_expand_vec_set_builtin (arglist);
16209 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16210 if (d->code == fcode)
16212 /* Compares are treated specially. */
16213 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16214 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16215 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16216 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16217 return ix86_expand_sse_compare (d, arglist, target);
16219 return ix86_expand_binop_builtin (d->icode, arglist, target);
16222 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16223 if (d->code == fcode)
16224 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16226 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16227 if (d->code == fcode)
16228 return ix86_expand_sse_comi (d, arglist, target);
16230 gcc_unreachable ();
16233 /* Expand an expression EXP that calls a built-in library function,
16234 with result going to TARGET if that's convenient
16235 (and in mode MODE if that's convenient).
16236 SUBTARGET may be used as the target for computing one of EXP's operands.
16237 IGNORE is nonzero if the value is to be ignored. */
16240 ix86_expand_library_builtin (tree exp, rtx target,
16241 rtx subtarget ATTRIBUTE_UNUSED,
16242 enum machine_mode mode ATTRIBUTE_UNUSED,
16245 enum built_in_function fncode;
16246 tree fndecl, newfn, call;
16248 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16249 if (!TARGET_SSELIBM)
16252 fncode = builtin_mathfn_code (exp);
16253 if (!ix86_builtin_function_variants [(int)fncode])
16256 fndecl = get_callee_fndecl (exp);
16257 if (DECL_RTL_SET_P (fndecl))
16260 /* Build the redirected call and expand it. */
16261 newfn = ix86_builtin_function_variants [(int)fncode];
16262 call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
16263 return expand_call (call, target, ignore);
16266 /* Store OPERAND to the memory after reload is completed. This means
16267 that we can't easily use assign_stack_local. */
16269 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16273 gcc_assert (reload_completed);
16274 if (TARGET_RED_ZONE)
16276 result = gen_rtx_MEM (mode,
16277 gen_rtx_PLUS (Pmode,
16279 GEN_INT (-RED_ZONE_SIZE)));
16280 emit_move_insn (result, operand);
16282 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16288 operand = gen_lowpart (DImode, operand);
16292 gen_rtx_SET (VOIDmode,
16293 gen_rtx_MEM (DImode,
16294 gen_rtx_PRE_DEC (DImode,
16295 stack_pointer_rtx)),
16299 gcc_unreachable ();
16301 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16310 split_di (&operand, 1, operands, operands + 1);
16312 gen_rtx_SET (VOIDmode,
16313 gen_rtx_MEM (SImode,
16314 gen_rtx_PRE_DEC (Pmode,
16315 stack_pointer_rtx)),
16318 gen_rtx_SET (VOIDmode,
16319 gen_rtx_MEM (SImode,
16320 gen_rtx_PRE_DEC (Pmode,
16321 stack_pointer_rtx)),
16326 /* Store HImodes as SImodes. */
16327 operand = gen_lowpart (SImode, operand);
16331 gen_rtx_SET (VOIDmode,
16332 gen_rtx_MEM (GET_MODE (operand),
16333 gen_rtx_PRE_DEC (SImode,
16334 stack_pointer_rtx)),
16338 gcc_unreachable ();
16340 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16345 /* Free operand from the memory. */
16347 ix86_free_from_memory (enum machine_mode mode)
16349 if (!TARGET_RED_ZONE)
16353 if (mode == DImode || TARGET_64BIT)
16357 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16358 to pop or add instruction if registers are available. */
16359 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16360 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16365 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16366 QImode must go into class Q_REGS.
16367 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16368 movdf to do mem-to-mem moves through integer regs. */
16370 ix86_preferred_reload_class (rtx x, enum reg_class class)
16372 enum machine_mode mode = GET_MODE (x);
16374 /* We're only allowed to return a subclass of CLASS. Many of the
16375 following checks fail for NO_REGS, so eliminate that early. */
16376 if (class == NO_REGS)
16379 /* All classes can load zeros. */
16380 if (x == CONST0_RTX (mode))
16383 /* Force constants into memory if we are loading a (nonzero) constant into
16384 an MMX or SSE register. This is because there are no MMX/SSE instructions
16385 to load from a constant. */
16387 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16390 /* Prefer SSE regs only, if we can use them for math. */
16391 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16392 return SSE_CLASS_P (class) ? class : NO_REGS;
16394 /* Floating-point constants need more complex checks. */
16395 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16397 /* General regs can load everything. */
16398 if (reg_class_subset_p (class, GENERAL_REGS))
16401 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16402 zero above. We only want to wind up preferring 80387 registers if
16403 we plan on doing computation with them. */
16405 && standard_80387_constant_p (x))
16407 /* Limit class to non-sse. */
16408 if (class == FLOAT_SSE_REGS)
16410 if (class == FP_TOP_SSE_REGS)
16412 if (class == FP_SECOND_SSE_REGS)
16413 return FP_SECOND_REG;
16414 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16421 /* Generally when we see PLUS here, it's the function invariant
16422 (plus soft-fp const_int). Which can only be computed into general
16424 if (GET_CODE (x) == PLUS)
16425 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16427 /* QImode constants are easy to load, but non-constant QImode data
16428 must go into Q_REGS. */
16429 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16431 if (reg_class_subset_p (class, Q_REGS))
16433 if (reg_class_subset_p (Q_REGS, class))
16441 /* Discourage putting floating-point values in SSE registers unless
16442 SSE math is being used, and likewise for the 387 registers. */
16444 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16446 enum machine_mode mode = GET_MODE (x);
16448 /* Restrict the output reload class to the register bank that we are doing
16449 math on. If we would like not to return a subset of CLASS, reject this
16450 alternative: if reload cannot do this, it will still use its choice. */
16451 mode = GET_MODE (x);
16452 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16453 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16455 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16457 if (class == FP_TOP_SSE_REGS)
16459 else if (class == FP_SECOND_SSE_REGS)
16460 return FP_SECOND_REG;
16462 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16468 /* If we are copying between general and FP registers, we need a memory
16469 location. The same is true for SSE and MMX registers.
16471 The macro can't work reliably when one of the CLASSES is class containing
16472 registers from multiple units (SSE, MMX, integer). We avoid this by never
16473 combining those units in single alternative in the machine description.
16474 Ensure that this constraint holds to avoid unexpected surprises.
16476 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16477 enforce these sanity checks. */
16480 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16481 enum machine_mode mode, int strict)
16483 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16484 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16485 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16486 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16487 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16488 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16490 gcc_assert (!strict);
16494 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16497 /* ??? This is a lie. We do have moves between mmx/general, and for
16498 mmx/sse2. But by saying we need secondary memory we discourage the
16499 register allocator from using the mmx registers unless needed. */
16500 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16503 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16505 /* SSE1 doesn't have any direct moves from other classes. */
16509 /* If the target says that inter-unit moves are more expensive
16510 than moving through memory, then don't generate them. */
16511 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16514 /* Between SSE and general, we have moves no larger than word size. */
16515 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16518 /* ??? For the cost of one register reformat penalty, we could use
16519 the same instructions to move SFmode and DFmode data, but the
16520 relevant move patterns don't support those alternatives. */
16521 if (mode == SFmode || mode == DFmode)
16528 /* Return true if the registers in CLASS cannot represent the change from
16529 modes FROM to TO. */
16532 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16533 enum reg_class class)
16538 /* x87 registers can't do subreg at all, as all values are reformatted
16539 to extended precision. */
16540 if (MAYBE_FLOAT_CLASS_P (class))
16543 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16545 /* Vector registers do not support QI or HImode loads. If we don't
16546 disallow a change to these modes, reload will assume it's ok to
16547 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16548 the vec_dupv4hi pattern. */
16549 if (GET_MODE_SIZE (from) < 4)
16552 /* Vector registers do not support subreg with nonzero offsets, which
16553 are otherwise valid for integer registers. Since we can't see
16554 whether we have a nonzero offset from here, prohibit all
16555 nonparadoxical subregs changing size. */
16556 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16563 /* Return the cost of moving data from a register in class CLASS1 to
16564 one in class CLASS2.
16566 It is not required that the cost always equal 2 when FROM is the same as TO;
16567 on some machines it is expensive to move between registers if they are not
16568 general registers. */
16571 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16572 enum reg_class class2)
16574 /* In case we require secondary memory, compute cost of the store followed
16575 by load. In order to avoid bad register allocation choices, we need
16576 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16578 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16582 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16583 MEMORY_MOVE_COST (mode, class1, 1));
16584 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16585 MEMORY_MOVE_COST (mode, class2, 1));
16587 /* In case of copying from general_purpose_register we may emit multiple
16588 stores followed by single load causing memory size mismatch stall.
16589 Count this as arbitrarily high cost of 20. */
16590 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16593 /* In the case of FP/MMX moves, the registers actually overlap, and we
16594 have to switch modes in order to treat them differently. */
16595 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16596 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16602 /* Moves between SSE/MMX and integer unit are expensive. */
16603 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16604 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16605 return ix86_cost->mmxsse_to_integer;
16606 if (MAYBE_FLOAT_CLASS_P (class1))
16607 return ix86_cost->fp_move;
16608 if (MAYBE_SSE_CLASS_P (class1))
16609 return ix86_cost->sse_move;
16610 if (MAYBE_MMX_CLASS_P (class1))
16611 return ix86_cost->mmx_move;
16615 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16618 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16620 /* Flags and only flags can only hold CCmode values. */
16621 if (CC_REGNO_P (regno))
16622 return GET_MODE_CLASS (mode) == MODE_CC;
16623 if (GET_MODE_CLASS (mode) == MODE_CC
16624 || GET_MODE_CLASS (mode) == MODE_RANDOM
16625 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16627 if (FP_REGNO_P (regno))
16628 return VALID_FP_MODE_P (mode);
16629 if (SSE_REGNO_P (regno))
16631 /* We implement the move patterns for all vector modes into and
16632 out of SSE registers, even when no operation instructions
16634 return (VALID_SSE_REG_MODE (mode)
16635 || VALID_SSE2_REG_MODE (mode)
16636 || VALID_MMX_REG_MODE (mode)
16637 || VALID_MMX_REG_MODE_3DNOW (mode));
16639 if (MMX_REGNO_P (regno))
16641 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16642 so if the register is available at all, then we can move data of
16643 the given mode into or out of it. */
16644 return (VALID_MMX_REG_MODE (mode)
16645 || VALID_MMX_REG_MODE_3DNOW (mode));
16648 if (mode == QImode)
16650 /* Take care for QImode values - they can be in non-QI regs,
16651 but then they do cause partial register stalls. */
16652 if (regno < 4 || TARGET_64BIT)
16654 if (!TARGET_PARTIAL_REG_STALL)
16656 return reload_in_progress || reload_completed;
16658 /* We handle both integer and floats in the general purpose registers. */
16659 else if (VALID_INT_MODE_P (mode))
16661 else if (VALID_FP_MODE_P (mode))
16663 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16664 on to use that value in smaller contexts, this can easily force a
16665 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16666 supporting DImode, allow it. */
16667 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16673 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16674 tieable integer mode. */
16677 ix86_tieable_integer_mode_p (enum machine_mode mode)
16686 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16689 return TARGET_64BIT;
16696 /* Return true if MODE1 is accessible in a register that can hold MODE2
16697 without copying. That is, all register classes that can hold MODE2
16698 can also hold MODE1. */
16701 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16703 if (mode1 == mode2)
16706 if (ix86_tieable_integer_mode_p (mode1)
16707 && ix86_tieable_integer_mode_p (mode2))
16710 /* MODE2 being XFmode implies fp stack or general regs, which means we
16711 can tie any smaller floating point modes to it. Note that we do not
16712 tie this with TFmode. */
16713 if (mode2 == XFmode)
16714 return mode1 == SFmode || mode1 == DFmode;
16716 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16717 that we can tie it with SFmode. */
16718 if (mode2 == DFmode)
16719 return mode1 == SFmode;
16721 /* If MODE2 is only appropriate for an SSE register, then tie with
16722 any other mode acceptable to SSE registers. */
16723 if (GET_MODE_SIZE (mode2) >= 8
16724 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16725 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16727 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16728 with any other mode acceptable to MMX registers. */
16729 if (GET_MODE_SIZE (mode2) == 8
16730 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16731 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16736 /* Return the cost of moving data of mode M between a
16737 register and memory. A value of 2 is the default; this cost is
16738 relative to those in `REGISTER_MOVE_COST'.
16740 If moving between registers and memory is more expensive than
16741 between two registers, you should define this macro to express the
16744 Model also increased moving costs of QImode registers in non
16748 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16750 if (FLOAT_CLASS_P (class))
16767 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16769 if (SSE_CLASS_P (class))
16772 switch (GET_MODE_SIZE (mode))
16786 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16788 if (MMX_CLASS_P (class))
16791 switch (GET_MODE_SIZE (mode))
16802 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16804 switch (GET_MODE_SIZE (mode))
16808 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16809 : ix86_cost->movzbl_load);
16811 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16812 : ix86_cost->int_store[0] + 4);
16815 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16817 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16818 if (mode == TFmode)
16820 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16821 * (((int) GET_MODE_SIZE (mode)
16822 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16826 /* Compute a (partial) cost for rtx X. Return true if the complete
16827 cost has been computed, and false if subexpressions should be
16828 scanned. In either case, *TOTAL contains the cost result. */
16831 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16833 enum machine_mode mode = GET_MODE (x);
16841 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16843 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16845 else if (flag_pic && SYMBOLIC_CONST (x)
16847 || (!GET_CODE (x) != LABEL_REF
16848 && (GET_CODE (x) != SYMBOL_REF
16849 || !SYMBOL_REF_LOCAL_P (x)))))
16856 if (mode == VOIDmode)
16859 switch (standard_80387_constant_p (x))
16864 default: /* Other constants */
16869 /* Start with (MEM (SYMBOL_REF)), since that's where
16870 it'll probably end up. Add a penalty for size. */
16871 *total = (COSTS_N_INSNS (1)
16872 + (flag_pic != 0 && !TARGET_64BIT)
16873 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16879 /* The zero extensions is often completely free on x86_64, so make
16880 it as cheap as possible. */
16881 if (TARGET_64BIT && mode == DImode
16882 && GET_MODE (XEXP (x, 0)) == SImode)
16884 else if (TARGET_ZERO_EXTEND_WITH_AND)
16885 *total = ix86_cost->add;
16887 *total = ix86_cost->movzx;
16891 *total = ix86_cost->movsx;
16895 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16896 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16898 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16901 *total = ix86_cost->add;
16904 if ((value == 2 || value == 3)
16905 && ix86_cost->lea <= ix86_cost->shift_const)
16907 *total = ix86_cost->lea;
16917 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16919 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16921 if (INTVAL (XEXP (x, 1)) > 32)
16922 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
16924 *total = ix86_cost->shift_const * 2;
16928 if (GET_CODE (XEXP (x, 1)) == AND)
16929 *total = ix86_cost->shift_var * 2;
16931 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
16936 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16937 *total = ix86_cost->shift_const;
16939 *total = ix86_cost->shift_var;
16944 if (FLOAT_MODE_P (mode))
16946 *total = ix86_cost->fmul;
16951 rtx op0 = XEXP (x, 0);
16952 rtx op1 = XEXP (x, 1);
16954 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16956 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16957 for (nbits = 0; value != 0; value &= value - 1)
16961 /* This is arbitrary. */
16964 /* Compute costs correctly for widening multiplication. */
16965 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16966 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16967 == GET_MODE_SIZE (mode))
16969 int is_mulwiden = 0;
16970 enum machine_mode inner_mode = GET_MODE (op0);
16972 if (GET_CODE (op0) == GET_CODE (op1))
16973 is_mulwiden = 1, op1 = XEXP (op1, 0);
16974 else if (GET_CODE (op1) == CONST_INT)
16976 if (GET_CODE (op0) == SIGN_EXTEND)
16977 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16980 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16984 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16987 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16988 + nbits * ix86_cost->mult_bit
16989 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
16998 if (FLOAT_MODE_P (mode))
16999 *total = ix86_cost->fdiv;
17001 *total = ix86_cost->divide[MODE_INDEX (mode)];
17005 if (FLOAT_MODE_P (mode))
17006 *total = ix86_cost->fadd;
17007 else if (GET_MODE_CLASS (mode) == MODE_INT
17008 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17010 if (GET_CODE (XEXP (x, 0)) == PLUS
17011 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17012 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17013 && CONSTANT_P (XEXP (x, 1)))
17015 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17016 if (val == 2 || val == 4 || val == 8)
17018 *total = ix86_cost->lea;
17019 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17020 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17022 *total += rtx_cost (XEXP (x, 1), outer_code);
17026 else if (GET_CODE (XEXP (x, 0)) == MULT
17027 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17029 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17030 if (val == 2 || val == 4 || val == 8)
17032 *total = ix86_cost->lea;
17033 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17034 *total += rtx_cost (XEXP (x, 1), outer_code);
17038 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17040 *total = ix86_cost->lea;
17041 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17042 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17043 *total += rtx_cost (XEXP (x, 1), outer_code);
17050 if (FLOAT_MODE_P (mode))
17052 *total = ix86_cost->fadd;
17060 if (!TARGET_64BIT && mode == DImode)
17062 *total = (ix86_cost->add * 2
17063 + (rtx_cost (XEXP (x, 0), outer_code)
17064 << (GET_MODE (XEXP (x, 0)) != DImode))
17065 + (rtx_cost (XEXP (x, 1), outer_code)
17066 << (GET_MODE (XEXP (x, 1)) != DImode)));
17072 if (FLOAT_MODE_P (mode))
17074 *total = ix86_cost->fchs;
17080 if (!TARGET_64BIT && mode == DImode)
17081 *total = ix86_cost->add * 2;
17083 *total = ix86_cost->add;
17087 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17088 && XEXP (XEXP (x, 0), 1) == const1_rtx
17089 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17090 && XEXP (x, 1) == const0_rtx)
17092 /* This kind of construct is implemented using test[bwl].
17093 Treat it as if we had an AND. */
17094 *total = (ix86_cost->add
17095 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17096 + rtx_cost (const1_rtx, outer_code));
17102 if (!TARGET_SSE_MATH
17104 || (mode == DFmode && !TARGET_SSE2))
17109 if (FLOAT_MODE_P (mode))
17110 *total = ix86_cost->fabs;
17114 if (FLOAT_MODE_P (mode))
17115 *total = ix86_cost->fsqrt;
17119 if (XINT (x, 1) == UNSPEC_TP)
17130 static int current_machopic_label_num;
17132 /* Given a symbol name and its associated stub, write out the
17133 definition of the stub. */
17136 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17138 unsigned int length;
17139 char *binder_name, *symbol_name, lazy_ptr_name[32];
17140 int label = ++current_machopic_label_num;
17142 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17143 symb = (*targetm.strip_name_encoding) (symb);
17145 length = strlen (stub);
17146 binder_name = alloca (length + 32);
17147 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17149 length = strlen (symb);
17150 symbol_name = alloca (length + 32);
17151 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17153 sprintf (lazy_ptr_name, "L%d$lz", label);
17156 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17158 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17160 fprintf (file, "%s:\n", stub);
17161 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17165 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17166 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17167 fprintf (file, "\tjmp\t*%%edx\n");
17170 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17172 fprintf (file, "%s:\n", binder_name);
17176 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17177 fprintf (file, "\tpushl\t%%eax\n");
17180 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17182 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17184 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17185 fprintf (file, "%s:\n", lazy_ptr_name);
17186 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17187 fprintf (file, "\t.long %s\n", binder_name);
17191 darwin_x86_file_end (void)
17193 darwin_file_end ();
17196 #endif /* TARGET_MACHO */
17198 /* Order the registers for register allocator. */
17201 x86_order_regs_for_local_alloc (void)
17206 /* First allocate the local general purpose registers. */
17207 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17208 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17209 reg_alloc_order [pos++] = i;
17211 /* Global general purpose registers. */
17212 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17213 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17214 reg_alloc_order [pos++] = i;
17216 /* x87 registers come first in case we are doing FP math
17218 if (!TARGET_SSE_MATH)
17219 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17220 reg_alloc_order [pos++] = i;
17222 /* SSE registers. */
17223 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17224 reg_alloc_order [pos++] = i;
17225 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17226 reg_alloc_order [pos++] = i;
17228 /* x87 registers. */
17229 if (TARGET_SSE_MATH)
17230 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17231 reg_alloc_order [pos++] = i;
17233 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17234 reg_alloc_order [pos++] = i;
17236 /* Initialize the rest of array as we do not allocate some registers
17238 while (pos < FIRST_PSEUDO_REGISTER)
17239 reg_alloc_order [pos++] = 0;
17242 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17243 struct attribute_spec.handler. */
17245 ix86_handle_struct_attribute (tree *node, tree name,
17246 tree args ATTRIBUTE_UNUSED,
17247 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17250 if (DECL_P (*node))
17252 if (TREE_CODE (*node) == TYPE_DECL)
17253 type = &TREE_TYPE (*node);
17258 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17259 || TREE_CODE (*type) == UNION_TYPE)))
17261 warning (OPT_Wattributes, "%qs attribute ignored",
17262 IDENTIFIER_POINTER (name));
17263 *no_add_attrs = true;
17266 else if ((is_attribute_p ("ms_struct", name)
17267 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17268 || ((is_attribute_p ("gcc_struct", name)
17269 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17271 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17272 IDENTIFIER_POINTER (name));
17273 *no_add_attrs = true;
17280 ix86_ms_bitfield_layout_p (tree record_type)
17282 return (TARGET_MS_BITFIELD_LAYOUT &&
17283 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17284 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17287 /* Returns an expression indicating where the this parameter is
17288 located on entry to the FUNCTION. */
17291 x86_this_parameter (tree function)
17293 tree type = TREE_TYPE (function);
17297 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17298 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17301 if (ix86_function_regparm (type, function) > 0)
17305 parm = TYPE_ARG_TYPES (type);
17306 /* Figure out whether or not the function has a variable number of
17308 for (; parm; parm = TREE_CHAIN (parm))
17309 if (TREE_VALUE (parm) == void_type_node)
17311 /* If not, the this parameter is in the first argument. */
17315 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17317 return gen_rtx_REG (SImode, regno);
17321 if (aggregate_value_p (TREE_TYPE (type), type))
17322 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17324 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17327 /* Determine whether x86_output_mi_thunk can succeed. */
17330 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17331 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17332 HOST_WIDE_INT vcall_offset, tree function)
17334 /* 64-bit can handle anything. */
17338 /* For 32-bit, everything's fine if we have one free register. */
17339 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17342 /* Need a free register for vcall_offset. */
17346 /* Need a free register for GOT references. */
17347 if (flag_pic && !(*targetm.binds_local_p) (function))
17350 /* Otherwise ok. */
17354 /* Output the assembler code for a thunk function. THUNK_DECL is the
17355 declaration for the thunk function itself, FUNCTION is the decl for
17356 the target function. DELTA is an immediate constant offset to be
17357 added to THIS. If VCALL_OFFSET is nonzero, the word at
17358 *(*this + vcall_offset) should be added to THIS. */
17361 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17362 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17363 HOST_WIDE_INT vcall_offset, tree function)
17366 rtx this = x86_this_parameter (function);
17369 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17370 pull it in now and let DELTA benefit. */
17373 else if (vcall_offset)
17375 /* Put the this parameter into %eax. */
17377 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17378 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17381 this_reg = NULL_RTX;
17383 /* Adjust the this parameter by a fixed constant. */
17386 xops[0] = GEN_INT (delta);
17387 xops[1] = this_reg ? this_reg : this;
17390 if (!x86_64_general_operand (xops[0], DImode))
17392 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17394 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17398 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17401 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17404 /* Adjust the this parameter by a value stored in the vtable. */
17408 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17411 int tmp_regno = 2 /* ECX */;
17412 if (lookup_attribute ("fastcall",
17413 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17414 tmp_regno = 0 /* EAX */;
17415 tmp = gen_rtx_REG (SImode, tmp_regno);
17418 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17421 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17423 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17425 /* Adjust the this parameter. */
17426 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17427 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17429 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17430 xops[0] = GEN_INT (vcall_offset);
17432 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17433 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17435 xops[1] = this_reg;
17437 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17439 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17442 /* If necessary, drop THIS back to its stack slot. */
17443 if (this_reg && this_reg != this)
17445 xops[0] = this_reg;
17447 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17450 xops[0] = XEXP (DECL_RTL (function), 0);
17453 if (!flag_pic || (*targetm.binds_local_p) (function))
17454 output_asm_insn ("jmp\t%P0", xops);
17457 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17458 tmp = gen_rtx_CONST (Pmode, tmp);
17459 tmp = gen_rtx_MEM (QImode, tmp);
17461 output_asm_insn ("jmp\t%A0", xops);
17466 if (!flag_pic || (*targetm.binds_local_p) (function))
17467 output_asm_insn ("jmp\t%P0", xops);
17472 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17473 tmp = (gen_rtx_SYMBOL_REF
17475 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17476 tmp = gen_rtx_MEM (QImode, tmp);
17478 output_asm_insn ("jmp\t%0", xops);
17481 #endif /* TARGET_MACHO */
17483 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17484 output_set_got (tmp, NULL_RTX);
17487 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17488 output_asm_insn ("jmp\t{*}%1", xops);
17494 x86_file_start (void)
17496 default_file_start ();
17498 darwin_file_start ();
17500 if (X86_FILE_START_VERSION_DIRECTIVE)
17501 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17502 if (X86_FILE_START_FLTUSED)
17503 fputs ("\t.global\t__fltused\n", asm_out_file);
17504 if (ix86_asm_dialect == ASM_INTEL)
17505 fputs ("\t.intel_syntax\n", asm_out_file);
17509 x86_field_alignment (tree field, int computed)
17511 enum machine_mode mode;
17512 tree type = TREE_TYPE (field);
17514 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17516 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17517 ? get_inner_array_type (type) : type);
17518 if (mode == DFmode || mode == DCmode
17519 || GET_MODE_CLASS (mode) == MODE_INT
17520 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17521 return MIN (32, computed);
17525 /* Output assembler code to FILE to increment profiler label # LABELNO
17526 for profiling a function entry. */
17528 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17533 #ifndef NO_PROFILE_COUNTERS
17534 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17536 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17540 #ifndef NO_PROFILE_COUNTERS
17541 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17543 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17547 #ifndef NO_PROFILE_COUNTERS
17548 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17549 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17551 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17555 #ifndef NO_PROFILE_COUNTERS
17556 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17557 PROFILE_COUNT_REGISTER);
17559 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17563 /* We don't have exact information about the insn sizes, but we may assume
17564 quite safely that we are informed about all 1 byte insns and memory
17565 address sizes. This is enough to eliminate unnecessary padding in
17569 min_insn_size (rtx insn)
17573 if (!INSN_P (insn) || !active_insn_p (insn))
17576 /* Discard alignments we've emit and jump instructions. */
17577 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17578 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17580 if (GET_CODE (insn) == JUMP_INSN
17581 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17582 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17585 /* Important case - calls are always 5 bytes.
17586 It is common to have many calls in the row. */
17587 if (GET_CODE (insn) == CALL_INSN
17588 && symbolic_reference_mentioned_p (PATTERN (insn))
17589 && !SIBLING_CALL_P (insn))
17591 if (get_attr_length (insn) <= 1)
17594 /* For normal instructions we may rely on the sizes of addresses
17595 and the presence of symbol to require 4 bytes of encoding.
17596 This is not the case for jumps where references are PC relative. */
17597 if (GET_CODE (insn) != JUMP_INSN)
17599 l = get_attr_length_address (insn);
17600 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17609 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17613 ix86_avoid_jump_misspredicts (void)
17615 rtx insn, start = get_insns ();
17616 int nbytes = 0, njumps = 0;
17619 /* Look for all minimal intervals of instructions containing 4 jumps.
17620 The intervals are bounded by START and INSN. NBYTES is the total
17621 size of instructions in the interval including INSN and not including
17622 START. When the NBYTES is smaller than 16 bytes, it is possible
17623 that the end of START and INSN ends up in the same 16byte page.
17625 The smallest offset in the page INSN can start is the case where START
17626 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17627 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17629 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17632 nbytes += min_insn_size (insn);
17634 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17635 INSN_UID (insn), min_insn_size (insn));
17636 if ((GET_CODE (insn) == JUMP_INSN
17637 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17638 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17639 || GET_CODE (insn) == CALL_INSN)
17646 start = NEXT_INSN (start);
17647 if ((GET_CODE (start) == JUMP_INSN
17648 && GET_CODE (PATTERN (start)) != ADDR_VEC
17649 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17650 || GET_CODE (start) == CALL_INSN)
17651 njumps--, isjump = 1;
17654 nbytes -= min_insn_size (start);
17656 gcc_assert (njumps >= 0);
17658 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17659 INSN_UID (start), INSN_UID (insn), nbytes);
17661 if (njumps == 3 && isjump && nbytes < 16)
17663 int padsize = 15 - nbytes + min_insn_size (insn);
17666 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17667 INSN_UID (insn), padsize);
17668 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17673 /* AMD Athlon works faster
17674 when RET is not destination of conditional jump or directly preceded
17675 by other jump instruction. We avoid the penalty by inserting NOP just
17676 before the RET instructions in such cases. */
17678 ix86_pad_returns (void)
17683 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17685 basic_block bb = e->src;
17686 rtx ret = BB_END (bb);
17688 bool replace = false;
17690 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17691 || !maybe_hot_bb_p (bb))
17693 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17694 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17696 if (prev && GET_CODE (prev) == CODE_LABEL)
17701 FOR_EACH_EDGE (e, ei, bb->preds)
17702 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17703 && !(e->flags & EDGE_FALLTHRU))
17708 prev = prev_active_insn (ret);
17710 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17711 || GET_CODE (prev) == CALL_INSN))
17713 /* Empty functions get branch mispredict even when the jump destination
17714 is not visible to us. */
17715 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17720 emit_insn_before (gen_return_internal_long (), ret);
17726 /* Implement machine specific optimizations. We implement padding of returns
17727 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17731 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17732 ix86_pad_returns ();
17733 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17734 ix86_avoid_jump_misspredicts ();
17737 /* Return nonzero when QImode register that must be represented via REX prefix
17740 x86_extended_QIreg_mentioned_p (rtx insn)
17743 extract_insn_cached (insn);
17744 for (i = 0; i < recog_data.n_operands; i++)
17745 if (REG_P (recog_data.operand[i])
17746 && REGNO (recog_data.operand[i]) >= 4)
17751 /* Return nonzero when P points to register encoded via REX prefix.
17752 Called via for_each_rtx. */
17754 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17756 unsigned int regno;
17759 regno = REGNO (*p);
17760 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17763 /* Return true when INSN mentions register that must be encoded using REX
17766 x86_extended_reg_mentioned_p (rtx insn)
17768 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17771 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17772 optabs would emit if we didn't have TFmode patterns. */
17775 x86_emit_floatuns (rtx operands[2])
17777 rtx neglab, donelab, i0, i1, f0, in, out;
17778 enum machine_mode mode, inmode;
17780 inmode = GET_MODE (operands[1]);
17781 gcc_assert (inmode == SImode || inmode == DImode);
17784 in = force_reg (inmode, operands[1]);
17785 mode = GET_MODE (out);
17786 neglab = gen_label_rtx ();
17787 donelab = gen_label_rtx ();
17788 i1 = gen_reg_rtx (Pmode);
17789 f0 = gen_reg_rtx (mode);
17791 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17793 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17794 emit_jump_insn (gen_jump (donelab));
17797 emit_label (neglab);
17799 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17800 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17801 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17802 expand_float (f0, i0, 0);
17803 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17805 emit_label (donelab);
17808 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17809 with all elements equal to VAR. Return true if successful. */
17812 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17813 rtx target, rtx val)
17815 enum machine_mode smode, wsmode, wvmode;
17822 if (!mmx_ok && !TARGET_SSE)
17830 val = force_reg (GET_MODE_INNER (mode), val);
17831 x = gen_rtx_VEC_DUPLICATE (mode, val);
17832 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17838 if (TARGET_SSE || TARGET_3DNOW_A)
17840 val = gen_lowpart (SImode, val);
17841 x = gen_rtx_TRUNCATE (HImode, val);
17842 x = gen_rtx_VEC_DUPLICATE (mode, x);
17843 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17865 /* Extend HImode to SImode using a paradoxical SUBREG. */
17866 tmp1 = gen_reg_rtx (SImode);
17867 emit_move_insn (tmp1, gen_lowpart (SImode, val));
17868 /* Insert the SImode value as low element of V4SImode vector. */
17869 tmp2 = gen_reg_rtx (V4SImode);
17870 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
17871 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
17872 CONST0_RTX (V4SImode),
17874 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
17875 /* Cast the V4SImode vector back to a V8HImode vector. */
17876 tmp1 = gen_reg_rtx (V8HImode);
17877 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
17878 /* Duplicate the low short through the whole low SImode word. */
17879 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
17880 /* Cast the V8HImode vector back to a V4SImode vector. */
17881 tmp2 = gen_reg_rtx (V4SImode);
17882 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
17883 /* Replicate the low element of the V4SImode vector. */
17884 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
17885 /* Cast the V2SImode back to V8HImode, and store in target. */
17886 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
17897 /* Extend QImode to SImode using a paradoxical SUBREG. */
17898 tmp1 = gen_reg_rtx (SImode);
17899 emit_move_insn (tmp1, gen_lowpart (SImode, val));
17900 /* Insert the SImode value as low element of V4SImode vector. */
17901 tmp2 = gen_reg_rtx (V4SImode);
17902 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
17903 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
17904 CONST0_RTX (V4SImode),
17906 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
17907 /* Cast the V4SImode vector back to a V16QImode vector. */
17908 tmp1 = gen_reg_rtx (V16QImode);
17909 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
17910 /* Duplicate the low byte through the whole low SImode word. */
17911 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
17912 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
17913 /* Cast the V16QImode vector back to a V4SImode vector. */
17914 tmp2 = gen_reg_rtx (V4SImode);
17915 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
17916 /* Replicate the low element of the V4SImode vector. */
17917 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
17918 /* Cast the V2SImode back to V16QImode, and store in target. */
17919 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
17927 /* Replicate the value once into the next wider mode and recurse. */
17928 val = convert_modes (wsmode, smode, val, true);
17929 x = expand_simple_binop (wsmode, ASHIFT, val,
17930 GEN_INT (GET_MODE_BITSIZE (smode)),
17931 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17932 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17934 x = gen_reg_rtx (wvmode);
17935 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17936 gcc_unreachable ();
17937 emit_move_insn (target, gen_lowpart (mode, x));
17945 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17946 whose ONE_VAR element is VAR, and other elements are zero. Return true
17950 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
17951 rtx target, rtx var, int one_var)
17953 enum machine_mode vsimode;
17961 if (!mmx_ok && !TARGET_SSE)
17969 var = force_reg (GET_MODE_INNER (mode), var);
17970 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17971 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17976 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
17977 new_target = gen_reg_rtx (mode);
17979 new_target = target;
17980 var = force_reg (GET_MODE_INNER (mode), var);
17981 x = gen_rtx_VEC_DUPLICATE (mode, var);
17982 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17983 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
17986 /* We need to shuffle the value to the correct position, so
17987 create a new pseudo to store the intermediate result. */
17989 /* With SSE2, we can use the integer shuffle insns. */
17990 if (mode != V4SFmode && TARGET_SSE2)
17992 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
17994 GEN_INT (one_var == 1 ? 0 : 1),
17995 GEN_INT (one_var == 2 ? 0 : 1),
17996 GEN_INT (one_var == 3 ? 0 : 1)));
17997 if (target != new_target)
17998 emit_move_insn (target, new_target);
18002 /* Otherwise convert the intermediate result to V4SFmode and
18003 use the SSE1 shuffle instructions. */
18004 if (mode != V4SFmode)
18006 tmp = gen_reg_rtx (V4SFmode);
18007 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18012 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18014 GEN_INT (one_var == 1 ? 0 : 1),
18015 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18016 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18018 if (mode != V4SFmode)
18019 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18020 else if (tmp != target)
18021 emit_move_insn (target, tmp);
18023 else if (target != new_target)
18024 emit_move_insn (target, new_target);
18029 vsimode = V4SImode;
18035 vsimode = V2SImode;
18041 /* Zero extend the variable element to SImode and recurse. */
18042 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18044 x = gen_reg_rtx (vsimode);
18045 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18047 gcc_unreachable ();
18049 emit_move_insn (target, gen_lowpart (mode, x));
18057 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18058 consisting of the values in VALS. It is known that all elements
18059 except ONE_VAR are constants. Return true if successful. */
18062 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18063 rtx target, rtx vals, int one_var)
18065 rtx var = XVECEXP (vals, 0, one_var);
18066 enum machine_mode wmode;
18069 const_vec = copy_rtx (vals);
18070 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18071 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18079 /* For the two element vectors, it's just as easy to use
18080 the general case. */
18096 /* There's no way to set one QImode entry easily. Combine
18097 the variable value with its adjacent constant value, and
18098 promote to an HImode set. */
18099 x = XVECEXP (vals, 0, one_var ^ 1);
18102 var = convert_modes (HImode, QImode, var, true);
18103 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18104 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18105 x = GEN_INT (INTVAL (x) & 0xff);
18109 var = convert_modes (HImode, QImode, var, true);
18110 x = gen_int_mode (INTVAL (x) << 8, HImode);
18112 if (x != const0_rtx)
18113 var = expand_simple_binop (HImode, IOR, var, x, var,
18114 1, OPTAB_LIB_WIDEN);
18116 x = gen_reg_rtx (wmode);
18117 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18118 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18120 emit_move_insn (target, gen_lowpart (mode, x));
18127 emit_move_insn (target, const_vec);
18128 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18132 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18133 all values variable, and none identical. */
18136 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18137 rtx target, rtx vals)
18139 enum machine_mode half_mode = GET_MODE_INNER (mode);
18140 rtx op0 = NULL, op1 = NULL;
18141 bool use_vec_concat = false;
18147 if (!mmx_ok && !TARGET_SSE)
18153 /* For the two element vectors, we always implement VEC_CONCAT. */
18154 op0 = XVECEXP (vals, 0, 0);
18155 op1 = XVECEXP (vals, 0, 1);
18156 use_vec_concat = true;
18160 half_mode = V2SFmode;
18163 half_mode = V2SImode;
18169 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18170 Recurse to load the two halves. */
18172 op0 = gen_reg_rtx (half_mode);
18173 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18174 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18176 op1 = gen_reg_rtx (half_mode);
18177 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18178 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18180 use_vec_concat = true;
18191 gcc_unreachable ();
18194 if (use_vec_concat)
18196 if (!register_operand (op0, half_mode))
18197 op0 = force_reg (half_mode, op0);
18198 if (!register_operand (op1, half_mode))
18199 op1 = force_reg (half_mode, op1);
18201 emit_insn (gen_rtx_SET (VOIDmode, target,
18202 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18206 int i, j, n_elts, n_words, n_elt_per_word;
18207 enum machine_mode inner_mode;
18208 rtx words[4], shift;
18210 inner_mode = GET_MODE_INNER (mode);
18211 n_elts = GET_MODE_NUNITS (mode);
18212 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18213 n_elt_per_word = n_elts / n_words;
18214 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18216 for (i = 0; i < n_words; ++i)
18218 rtx word = NULL_RTX;
18220 for (j = 0; j < n_elt_per_word; ++j)
18222 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18223 elt = convert_modes (word_mode, inner_mode, elt, true);
18229 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18230 word, 1, OPTAB_LIB_WIDEN);
18231 word = expand_simple_binop (word_mode, IOR, word, elt,
18232 word, 1, OPTAB_LIB_WIDEN);
18240 emit_move_insn (target, gen_lowpart (mode, words[0]));
18241 else if (n_words == 2)
18243 rtx tmp = gen_reg_rtx (mode);
18244 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18245 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18246 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18247 emit_move_insn (target, tmp);
18249 else if (n_words == 4)
18251 rtx tmp = gen_reg_rtx (V4SImode);
18252 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18253 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18254 emit_move_insn (target, gen_lowpart (mode, tmp));
18257 gcc_unreachable ();
18261 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18262 instructions unless MMX_OK is true. */
18265 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18267 enum machine_mode mode = GET_MODE (target);
18268 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18269 int n_elts = GET_MODE_NUNITS (mode);
18270 int n_var = 0, one_var = -1;
18271 bool all_same = true, all_const_zero = true;
18275 for (i = 0; i < n_elts; ++i)
18277 x = XVECEXP (vals, 0, i);
18278 if (!CONSTANT_P (x))
18279 n_var++, one_var = i;
18280 else if (x != CONST0_RTX (inner_mode))
18281 all_const_zero = false;
18282 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18286 /* Constants are best loaded from the constant pool. */
18289 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18293 /* If all values are identical, broadcast the value. */
18295 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18296 XVECEXP (vals, 0, 0)))
18299 /* Values where only one field is non-constant are best loaded from
18300 the pool and overwritten via move later. */
18304 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18305 XVECEXP (vals, 0, one_var),
18309 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18313 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18317 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18319 enum machine_mode mode = GET_MODE (target);
18320 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18321 bool use_vec_merge = false;
18330 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18331 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18333 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18335 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18336 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18346 /* For the two element vectors, we implement a VEC_CONCAT with
18347 the extraction of the other element. */
18349 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18350 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18353 op0 = val, op1 = tmp;
18355 op0 = tmp, op1 = val;
18357 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18358 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18366 use_vec_merge = true;
18370 /* tmp = target = A B C D */
18371 tmp = copy_to_reg (target);
18372 /* target = A A B B */
18373 emit_insn (gen_sse_unpcklps (target, target, target));
18374 /* target = X A B B */
18375 ix86_expand_vector_set (false, target, val, 0);
18376 /* target = A X C D */
18377 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18378 GEN_INT (1), GEN_INT (0),
18379 GEN_INT (2+4), GEN_INT (3+4)));
18383 /* tmp = target = A B C D */
18384 tmp = copy_to_reg (target);
18385 /* tmp = X B C D */
18386 ix86_expand_vector_set (false, tmp, val, 0);
18387 /* target = A B X D */
18388 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18389 GEN_INT (0), GEN_INT (1),
18390 GEN_INT (0+4), GEN_INT (3+4)));
18394 /* tmp = target = A B C D */
18395 tmp = copy_to_reg (target);
18396 /* tmp = X B C D */
18397 ix86_expand_vector_set (false, tmp, val, 0);
18398 /* target = A B X D */
18399 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18400 GEN_INT (0), GEN_INT (1),
18401 GEN_INT (2+4), GEN_INT (0+4)));
18405 gcc_unreachable ();
18410 /* Element 0 handled by vec_merge below. */
18413 use_vec_merge = true;
18419 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18420 store into element 0, then shuffle them back. */
18424 order[0] = GEN_INT (elt);
18425 order[1] = const1_rtx;
18426 order[2] = const2_rtx;
18427 order[3] = GEN_INT (3);
18428 order[elt] = const0_rtx;
18430 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18431 order[1], order[2], order[3]));
18433 ix86_expand_vector_set (false, target, val, 0);
18435 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18436 order[1], order[2], order[3]));
18440 /* For SSE1, we have to reuse the V4SF code. */
18441 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18442 gen_lowpart (SFmode, val), elt);
18447 use_vec_merge = TARGET_SSE2;
18450 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18461 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18462 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18463 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18467 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18469 emit_move_insn (mem, target);
18471 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18472 emit_move_insn (tmp, val);
18474 emit_move_insn (target, mem);
18479 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18481 enum machine_mode mode = GET_MODE (vec);
18482 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18483 bool use_vec_extr = false;
18496 use_vec_extr = true;
18508 tmp = gen_reg_rtx (mode);
18509 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18510 GEN_INT (elt), GEN_INT (elt),
18511 GEN_INT (elt+4), GEN_INT (elt+4)));
18515 tmp = gen_reg_rtx (mode);
18516 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18520 gcc_unreachable ();
18523 use_vec_extr = true;
18538 tmp = gen_reg_rtx (mode);
18539 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18540 GEN_INT (elt), GEN_INT (elt),
18541 GEN_INT (elt), GEN_INT (elt)));
18545 tmp = gen_reg_rtx (mode);
18546 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18550 gcc_unreachable ();
18553 use_vec_extr = true;
18558 /* For SSE1, we have to reuse the V4SF code. */
18559 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18560 gen_lowpart (V4SFmode, vec), elt);
18566 use_vec_extr = TARGET_SSE2;
18569 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18574 /* ??? Could extract the appropriate HImode element and shift. */
18581 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18582 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18584 /* Let the rtl optimizers know about the zero extension performed. */
18585 if (inner_mode == HImode)
18587 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18588 target = gen_lowpart (SImode, target);
18591 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18595 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18597 emit_move_insn (mem, vec);
18599 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18600 emit_move_insn (target, tmp);
18604 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18605 pattern to reduce; DEST is the destination; IN is the input vector. */
18608 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18610 rtx tmp1, tmp2, tmp3;
18612 tmp1 = gen_reg_rtx (V4SFmode);
18613 tmp2 = gen_reg_rtx (V4SFmode);
18614 tmp3 = gen_reg_rtx (V4SFmode);
18616 emit_insn (gen_sse_movhlps (tmp1, in, in));
18617 emit_insn (fn (tmp2, tmp1, in));
18619 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18620 GEN_INT (1), GEN_INT (1),
18621 GEN_INT (1+4), GEN_INT (1+4)));
18622 emit_insn (fn (dest, tmp2, tmp3));
18625 /* Target hook for scalar_mode_supported_p. */
18627 ix86_scalar_mode_supported_p (enum machine_mode mode)
18629 if (DECIMAL_FLOAT_MODE_P (mode))
18632 return default_scalar_mode_supported_p (mode);
18635 /* Implements target hook vector_mode_supported_p. */
18637 ix86_vector_mode_supported_p (enum machine_mode mode)
18639 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18641 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18643 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18645 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18650 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18652 We do this in the new i386 backend to maintain source compatibility
18653 with the old cc0-based compiler. */
18656 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18657 tree inputs ATTRIBUTE_UNUSED,
18660 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18662 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18664 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18669 /* Return true if this goes in small data/bss. */
18672 ix86_in_large_data_p (tree exp)
18674 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18677 /* Functions are never large data. */
18678 if (TREE_CODE (exp) == FUNCTION_DECL)
18681 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18683 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18684 if (strcmp (section, ".ldata") == 0
18685 || strcmp (section, ".lbss") == 0)
18691 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18693 /* If this is an incomplete type with size 0, then we can't put it
18694 in data because it might be too big when completed. */
18695 if (!size || size > ix86_section_threshold)
18702 ix86_encode_section_info (tree decl, rtx rtl, int first)
18704 default_encode_section_info (decl, rtl, first);
18706 if (TREE_CODE (decl) == VAR_DECL
18707 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18708 && ix86_in_large_data_p (decl))
18709 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18712 /* Worker function for REVERSE_CONDITION. */
18715 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18717 return (mode != CCFPmode && mode != CCFPUmode
18718 ? reverse_condition (code)
18719 : reverse_condition_maybe_unordered (code));
18722 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18726 output_387_reg_move (rtx insn, rtx *operands)
18728 if (REG_P (operands[1])
18729 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18731 if (REGNO (operands[0]) == FIRST_STACK_REG
18732 && TARGET_USE_FFREEP)
18733 return "ffreep\t%y0";
18734 return "fstp\t%y0";
18736 if (STACK_TOP_P (operands[0]))
18737 return "fld%z1\t%y1";
18741 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18742 FP status register is set. */
18745 ix86_emit_fp_unordered_jump (rtx label)
18747 rtx reg = gen_reg_rtx (HImode);
18750 emit_insn (gen_x86_fnstsw_1 (reg));
18752 if (TARGET_USE_SAHF)
18754 emit_insn (gen_x86_sahf_1 (reg));
18756 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18757 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18761 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18763 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18764 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18767 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18768 gen_rtx_LABEL_REF (VOIDmode, label),
18770 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18771 emit_jump_insn (temp);
18774 /* Output code to perform a log1p XFmode calculation. */
18776 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18778 rtx label1 = gen_label_rtx ();
18779 rtx label2 = gen_label_rtx ();
18781 rtx tmp = gen_reg_rtx (XFmode);
18782 rtx tmp2 = gen_reg_rtx (XFmode);
18784 emit_insn (gen_absxf2 (tmp, op1));
18785 emit_insn (gen_cmpxf (tmp,
18786 CONST_DOUBLE_FROM_REAL_VALUE (
18787 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18789 emit_jump_insn (gen_bge (label1));
18791 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18792 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18793 emit_jump (label2);
18795 emit_label (label1);
18796 emit_move_insn (tmp, CONST1_RTX (XFmode));
18797 emit_insn (gen_addxf3 (tmp, op1, tmp));
18798 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18799 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18801 emit_label (label2);
18804 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18807 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18810 /* With Binutils 2.15, the "@unwind" marker must be specified on
18811 every occurrence of the ".eh_frame" section, not just the first
18814 && strcmp (name, ".eh_frame") == 0)
18816 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18817 flags & SECTION_WRITE ? "aw" : "a");
18820 default_elf_asm_named_section (name, flags, decl);
18823 /* Return the mangling of TYPE if it is an extended fundamental type. */
18825 static const char *
18826 ix86_mangle_fundamental_type (tree type)
18828 switch (TYPE_MODE (type))
18831 /* __float128 is "g". */
18834 /* "long double" or __float80 is "e". */
18841 /* For 32-bit code we can save PIC register setup by using
18842 __stack_chk_fail_local hidden function instead of calling
18843 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18844 register, so it is better to call __stack_chk_fail directly. */
18847 ix86_stack_protect_fail (void)
18849 return TARGET_64BIT
18850 ? default_external_stack_protect_fail ()
18851 : default_hidden_stack_protect_fail ();
18854 /* Select a format to encode pointers in exception handling data. CODE
18855 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18856 true if the symbol may be affected by dynamic relocations.
18858 ??? All x86 object file formats are capable of representing this.
18859 After all, the relocation needed is the same as for the call insn.
18860 Whether or not a particular assembler allows us to enter such, I
18861 guess we'll have to see. */
18863 asm_preferred_eh_data_format (int code, int global)
18867 int type = DW_EH_PE_sdata8;
18869 || ix86_cmodel == CM_SMALL_PIC
18870 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18871 type = DW_EH_PE_sdata4;
18872 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18874 if (ix86_cmodel == CM_SMALL
18875 || (ix86_cmodel == CM_MEDIUM && code))
18876 return DW_EH_PE_udata4;
18877 return DW_EH_PE_absptr;
18880 #include "gt-i386.h"