1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_3dnow_a = m_ATHLON_K8;
747 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
748 /* Branch hints were put in P4 based on simulation result. But
749 after P4 was made, no performance benefit was observed with
750 branch hints. It also increases the code size. As the result,
751 icc never generates branch hints. */
752 const int x86_branch_hints = 0;
753 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
754 /* We probably ought to watch for partial register stalls on Generic32
755 compilation setting as well. However in current implementation the
756 partial register stalls are not eliminated very well - they can
757 be introduced via subregs synthesized by combine and can happen
758 in caller/callee saving sequences.
759 Because this option pays back little on PPro based chips and is in conflict
760 with partial reg. dependencies used by Athlon/P4 based chips, it is better
761 to leave it off for generic32 for now. */
762 const int x86_partial_reg_stall = m_PPRO;
763 const int x86_partial_flag_reg_stall = m_GENERIC;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850 AREG, DREG, CREG, BREG,
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame;
1017 int outgoing_arguments_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer;
1054 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary;
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost;
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1065 int ix86_section_threshold = 65536;
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix[16];
1069 int internal_label_prefix_len;
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx, int);
1073 static void put_condition_code (enum rtx_code, enum machine_mode,
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx *, void *);
1077 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1078 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1083 static rtx get_thread_pointer (int);
1084 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx gen_push (rtx);
1087 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1088 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1089 static struct machine_function * ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1094 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1096 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1098 static rtx ix86_expand_aligntest (rtx, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static const char *ix86_mangle_fundamental_type (tree);
1162 static tree ix86_stack_protect_fail (void);
1163 static rtx ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1178 enum x86_64_reg_class
1181 X86_64_INTEGER_CLASS,
1182 X86_64_INTEGERSI_CLASS,
1189 X86_64_COMPLEX_X87_CLASS,
1192 static const char * const x86_64_reg_class_name[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1197 #define MAX_CLASSES 4
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1201 static bool ext_80387_constants_init = 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1204 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1205 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1206 static section *x86_64_elf_select_section (tree decl, int reloc,
1207 unsigned HOST_WIDE_INT align)
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1368 struct gcc_target targetm = TARGET_INITIALIZER;
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1377 /* Implement TARGET_HANDLE_OPTION. */
1380 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1387 target_flags &= ~MASK_3DNOW_A;
1388 target_flags_explicit |= MASK_3DNOW_A;
1395 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1396 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1403 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1404 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1411 target_flags &= ~MASK_SSE3;
1412 target_flags_explicit |= MASK_SSE3;
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1431 override_options (void)
1434 int ix86_tune_defaulted = 0;
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1441 const struct processor_costs *cost; /* Processor costs */
1442 const int target_enable; /* Target flags to enable. */
1443 const int target_disable; /* Target flags to disable. */
1444 const int align_loop; /* Default alignments. */
1445 const int align_loop_max_skip;
1446 const int align_jump;
1447 const int align_jump_max_skip;
1448 const int align_func;
1450 const processor_target_table[PROCESSOR_max] =
1452 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468 const char *const name; /* processor name or nickname. */
1469 const enum processor_type processor;
1470 const enum pta_flags
1476 PTA_PREFETCH_SSE = 16,
1483 const processor_alias_table[] =
1485 {"i386", PROCESSOR_I386, 0},
1486 {"i486", PROCESSOR_I486, 0},
1487 {"i586", PROCESSOR_PENTIUM, 0},
1488 {"pentium", PROCESSOR_PENTIUM, 0},
1489 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1490 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1491 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1493 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1494 {"i686", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1496 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1497 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1499 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1500 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1501 | PTA_MMX | PTA_PREFETCH_SSE},
1502 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1503 | PTA_MMX | PTA_PREFETCH_SSE},
1504 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1505 | PTA_MMX | PTA_PREFETCH_SSE},
1506 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1507 | PTA_MMX | PTA_PREFETCH_SSE},
1508 {"k6", PROCESSOR_K6, PTA_MMX},
1509 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1511 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1513 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1514 | PTA_3DNOW | PTA_3DNOW_A},
1515 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1516 | PTA_3DNOW_A | PTA_SSE},
1517 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1518 | PTA_3DNOW_A | PTA_SSE},
1519 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1520 | PTA_3DNOW_A | PTA_SSE},
1521 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1522 | PTA_SSE | PTA_SSE2 },
1523 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1524 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1525 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1527 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1532 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1535 int const pta_size = ARRAY_SIZE (processor_alias_table);
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS;
1541 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1542 SUBSUBTARGET_OVERRIDE_OPTIONS;
1545 /* -fPIC is the default for x86_64. */
1546 if (TARGET_MACHO && TARGET_64BIT)
1549 /* Set the default values for switches whose default depends on TARGET_64BIT
1550 in case they weren't overwritten by command line options. */
1553 /* Mach-O doesn't support omitting the frame pointer for now. */
1554 if (flag_omit_frame_pointer == 2)
1555 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1556 if (flag_asynchronous_unwind_tables == 2)
1557 flag_asynchronous_unwind_tables = 1;
1558 if (flag_pcc_struct_return == 2)
1559 flag_pcc_struct_return = 0;
1563 if (flag_omit_frame_pointer == 2)
1564 flag_omit_frame_pointer = 0;
1565 if (flag_asynchronous_unwind_tables == 2)
1566 flag_asynchronous_unwind_tables = 0;
1567 if (flag_pcc_struct_return == 2)
1568 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1571 /* Need to check -mtune=generic first. */
1572 if (ix86_tune_string)
1574 if (!strcmp (ix86_tune_string, "generic")
1575 || !strcmp (ix86_tune_string, "i686")
1576 /* As special support for cross compilers we read -mtune=native
1577 as -mtune=generic. With native compilers we won't see the
1578 -mtune=native, as it was changed by the driver. */
1579 || !strcmp (ix86_tune_string, "native"))
1582 ix86_tune_string = "generic64";
1584 ix86_tune_string = "generic32";
1586 else if (!strncmp (ix86_tune_string, "generic", 7))
1587 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1591 if (ix86_arch_string)
1592 ix86_tune_string = ix86_arch_string;
1593 if (!ix86_tune_string)
1595 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1596 ix86_tune_defaulted = 1;
1599 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1600 need to use a sensible tune option. */
1601 if (!strcmp (ix86_tune_string, "generic")
1602 || !strcmp (ix86_tune_string, "x86-64")
1603 || !strcmp (ix86_tune_string, "i686"))
1606 ix86_tune_string = "generic64";
1608 ix86_tune_string = "generic32";
1611 if (!strcmp (ix86_tune_string, "x86-64"))
1612 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1613 "-mtune=generic instead as appropriate.");
1615 if (!ix86_arch_string)
1616 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1617 if (!strcmp (ix86_arch_string, "generic"))
1618 error ("generic CPU can be used only for -mtune= switch");
1619 if (!strncmp (ix86_arch_string, "generic", 7))
1620 error ("bad value (%s) for -march= switch", ix86_arch_string);
1622 if (ix86_cmodel_string != 0)
1624 if (!strcmp (ix86_cmodel_string, "small"))
1625 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1626 else if (!strcmp (ix86_cmodel_string, "medium"))
1627 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1629 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1630 else if (!strcmp (ix86_cmodel_string, "32"))
1631 ix86_cmodel = CM_32;
1632 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1633 ix86_cmodel = CM_KERNEL;
1634 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1635 ix86_cmodel = CM_LARGE;
1637 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1641 ix86_cmodel = CM_32;
1643 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1645 if (ix86_asm_string != 0)
1648 && !strcmp (ix86_asm_string, "intel"))
1649 ix86_asm_dialect = ASM_INTEL;
1650 else if (!strcmp (ix86_asm_string, "att"))
1651 ix86_asm_dialect = ASM_ATT;
1653 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1655 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1656 error ("code model %qs not supported in the %s bit mode",
1657 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1658 if (ix86_cmodel == CM_LARGE)
1659 sorry ("code model %<large%> not supported yet");
1660 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1661 sorry ("%i-bit mode not compiled in",
1662 (target_flags & MASK_64BIT) ? 64 : 32);
1664 for (i = 0; i < pta_size; i++)
1665 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1667 ix86_arch = processor_alias_table[i].processor;
1668 /* Default cpu tuning to the architecture. */
1669 ix86_tune = ix86_arch;
1670 if (processor_alias_table[i].flags & PTA_MMX
1671 && !(target_flags_explicit & MASK_MMX))
1672 target_flags |= MASK_MMX;
1673 if (processor_alias_table[i].flags & PTA_3DNOW
1674 && !(target_flags_explicit & MASK_3DNOW))
1675 target_flags |= MASK_3DNOW;
1676 if (processor_alias_table[i].flags & PTA_3DNOW_A
1677 && !(target_flags_explicit & MASK_3DNOW_A))
1678 target_flags |= MASK_3DNOW_A;
1679 if (processor_alias_table[i].flags & PTA_SSE
1680 && !(target_flags_explicit & MASK_SSE))
1681 target_flags |= MASK_SSE;
1682 if (processor_alias_table[i].flags & PTA_SSE2
1683 && !(target_flags_explicit & MASK_SSE2))
1684 target_flags |= MASK_SSE2;
1685 if (processor_alias_table[i].flags & PTA_SSE3
1686 && !(target_flags_explicit & MASK_SSE3))
1687 target_flags |= MASK_SSE3;
1688 if (processor_alias_table[i].flags & PTA_SSSE3
1689 && !(target_flags_explicit & MASK_SSSE3))
1690 target_flags |= MASK_SSSE3;
1691 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1692 x86_prefetch_sse = true;
1693 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1694 error ("CPU you selected does not support x86-64 "
1700 error ("bad value (%s) for -march= switch", ix86_arch_string);
1702 for (i = 0; i < pta_size; i++)
1703 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1705 ix86_tune = processor_alias_table[i].processor;
1706 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1708 if (ix86_tune_defaulted)
1710 ix86_tune_string = "x86-64";
1711 for (i = 0; i < pta_size; i++)
1712 if (! strcmp (ix86_tune_string,
1713 processor_alias_table[i].name))
1715 ix86_tune = processor_alias_table[i].processor;
1718 error ("CPU you selected does not support x86-64 "
1721 /* Intel CPUs have always interpreted SSE prefetch instructions as
1722 NOPs; so, we can enable SSE prefetch instructions even when
1723 -mtune (rather than -march) points us to a processor that has them.
1724 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1725 higher processors. */
1726 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1727 x86_prefetch_sse = true;
1731 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1734 ix86_cost = &size_cost;
1736 ix86_cost = processor_target_table[ix86_tune].cost;
1737 target_flags |= processor_target_table[ix86_tune].target_enable;
1738 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1740 /* Arrange to set up i386_stack_locals for all functions. */
1741 init_machine_status = ix86_init_machine_status;
1743 /* Validate -mregparm= value. */
1744 if (ix86_regparm_string)
1746 i = atoi (ix86_regparm_string);
1747 if (i < 0 || i > REGPARM_MAX)
1748 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1754 ix86_regparm = REGPARM_MAX;
1756 /* If the user has provided any of the -malign-* options,
1757 warn and use that value only if -falign-* is not set.
1758 Remove this code in GCC 3.2 or later. */
1759 if (ix86_align_loops_string)
1761 warning (0, "-malign-loops is obsolete, use -falign-loops");
1762 if (align_loops == 0)
1764 i = atoi (ix86_align_loops_string);
1765 if (i < 0 || i > MAX_CODE_ALIGN)
1766 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1768 align_loops = 1 << i;
1772 if (ix86_align_jumps_string)
1774 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1775 if (align_jumps == 0)
1777 i = atoi (ix86_align_jumps_string);
1778 if (i < 0 || i > MAX_CODE_ALIGN)
1779 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1781 align_jumps = 1 << i;
1785 if (ix86_align_funcs_string)
1787 warning (0, "-malign-functions is obsolete, use -falign-functions");
1788 if (align_functions == 0)
1790 i = atoi (ix86_align_funcs_string);
1791 if (i < 0 || i > MAX_CODE_ALIGN)
1792 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1794 align_functions = 1 << i;
1798 /* Default align_* from the processor table. */
1799 if (align_loops == 0)
1801 align_loops = processor_target_table[ix86_tune].align_loop;
1802 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1804 if (align_jumps == 0)
1806 align_jumps = processor_target_table[ix86_tune].align_jump;
1807 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1809 if (align_functions == 0)
1811 align_functions = processor_target_table[ix86_tune].align_func;
1814 /* Validate -mbranch-cost= value, or provide default. */
1815 ix86_branch_cost = ix86_cost->branch_cost;
1816 if (ix86_branch_cost_string)
1818 i = atoi (ix86_branch_cost_string);
1820 error ("-mbranch-cost=%d is not between 0 and 5", i);
1822 ix86_branch_cost = i;
1824 if (ix86_section_threshold_string)
1826 i = atoi (ix86_section_threshold_string);
1828 error ("-mlarge-data-threshold=%d is negative", i);
1830 ix86_section_threshold = i;
1833 if (ix86_tls_dialect_string)
1835 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1836 ix86_tls_dialect = TLS_DIALECT_GNU;
1837 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1838 ix86_tls_dialect = TLS_DIALECT_GNU2;
1839 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1840 ix86_tls_dialect = TLS_DIALECT_SUN;
1842 error ("bad value (%s) for -mtls-dialect= switch",
1843 ix86_tls_dialect_string);
1846 /* Keep nonleaf frame pointers. */
1847 if (flag_omit_frame_pointer)
1848 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1849 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1850 flag_omit_frame_pointer = 1;
1852 /* If we're doing fast math, we don't care about comparison order
1853 wrt NaNs. This lets us use a shorter comparison sequence. */
1854 if (flag_finite_math_only)
1855 target_flags &= ~MASK_IEEE_FP;
1857 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1858 since the insns won't need emulation. */
1859 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1860 target_flags &= ~MASK_NO_FANCY_MATH_387;
1862 /* Likewise, if the target doesn't have a 387, or we've specified
1863 software floating point, don't use 387 inline intrinsics. */
1865 target_flags |= MASK_NO_FANCY_MATH_387;
1867 /* Turn on SSE3 builtins for -mssse3. */
1869 target_flags |= MASK_SSE3;
1871 /* Turn on SSE2 builtins for -msse3. */
1873 target_flags |= MASK_SSE2;
1875 /* Turn on SSE builtins for -msse2. */
1877 target_flags |= MASK_SSE;
1879 /* Turn on MMX builtins for -msse. */
1882 target_flags |= MASK_MMX & ~target_flags_explicit;
1883 x86_prefetch_sse = true;
1886 /* Turn on MMX builtins for 3Dnow. */
1888 target_flags |= MASK_MMX;
1892 if (TARGET_ALIGN_DOUBLE)
1893 error ("-malign-double makes no sense in the 64bit mode");
1895 error ("-mrtd calling convention not supported in the 64bit mode");
1897 /* Enable by default the SSE and MMX builtins. Do allow the user to
1898 explicitly disable any of these. In particular, disabling SSE and
1899 MMX for kernel code is extremely useful. */
1901 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1902 & ~target_flags_explicit);
1906 /* i386 ABI does not specify red zone. It still makes sense to use it
1907 when programmer takes care to stack from being destroyed. */
1908 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1909 target_flags |= MASK_NO_RED_ZONE;
1912 /* Validate -mpreferred-stack-boundary= value, or provide default.
1913 The default of 128 bits is for Pentium III's SSE __m128. We can't
1914 change it because of optimize_size. Otherwise, we can't mix object
1915 files compiled with -Os and -On. */
1916 ix86_preferred_stack_boundary = 128;
1917 if (ix86_preferred_stack_boundary_string)
1919 i = atoi (ix86_preferred_stack_boundary_string);
1920 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1921 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1922 TARGET_64BIT ? 4 : 2);
1924 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1927 /* Accept -msseregparm only if at least SSE support is enabled. */
1928 if (TARGET_SSEREGPARM
1930 error ("-msseregparm used without SSE enabled");
1932 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1934 if (ix86_fpmath_string != 0)
1936 if (! strcmp (ix86_fpmath_string, "387"))
1937 ix86_fpmath = FPMATH_387;
1938 else if (! strcmp (ix86_fpmath_string, "sse"))
1942 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1943 ix86_fpmath = FPMATH_387;
1946 ix86_fpmath = FPMATH_SSE;
1948 else if (! strcmp (ix86_fpmath_string, "387,sse")
1949 || ! strcmp (ix86_fpmath_string, "sse,387"))
1953 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1954 ix86_fpmath = FPMATH_387;
1956 else if (!TARGET_80387)
1958 warning (0, "387 instruction set disabled, using SSE arithmetics");
1959 ix86_fpmath = FPMATH_SSE;
1962 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1965 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1968 /* If the i387 is disabled, then do not return values in it. */
1970 target_flags &= ~MASK_FLOAT_RETURNS;
1972 if ((x86_accumulate_outgoing_args & TUNEMASK)
1973 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1975 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1977 /* ??? Unwind info is not correct around the CFG unless either a frame
1978 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1979 unwind info generation to be aware of the CFG and propagating states
1981 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1982 || flag_exceptions || flag_non_call_exceptions)
1983 && flag_omit_frame_pointer
1984 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1986 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1987 warning (0, "unwind tables currently require either a frame pointer "
1988 "or -maccumulate-outgoing-args for correctness");
1989 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1992 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1995 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1996 p = strchr (internal_label_prefix, 'X');
1997 internal_label_prefix_len = p - internal_label_prefix;
2001 /* When scheduling description is not available, disable scheduler pass
2002 so it won't slow down the compilation and make x87 code slower. */
2003 if (!TARGET_SCHEDULE)
2004 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2007 /* switch to the appropriate section for output of DECL.
2008 DECL is either a `VAR_DECL' node or a constant of some sort.
2009 RELOC indicates whether forming the initial value of DECL requires
2010 link-time relocations. */
2013 x86_64_elf_select_section (tree decl, int reloc,
2014 unsigned HOST_WIDE_INT align)
2016 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2017 && ix86_in_large_data_p (decl))
2019 const char *sname = NULL;
2020 unsigned int flags = SECTION_WRITE;
2021 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2026 case SECCAT_DATA_REL:
2027 sname = ".ldata.rel";
2029 case SECCAT_DATA_REL_LOCAL:
2030 sname = ".ldata.rel.local";
2032 case SECCAT_DATA_REL_RO:
2033 sname = ".ldata.rel.ro";
2035 case SECCAT_DATA_REL_RO_LOCAL:
2036 sname = ".ldata.rel.ro.local";
2040 flags |= SECTION_BSS;
2043 case SECCAT_RODATA_MERGE_STR:
2044 case SECCAT_RODATA_MERGE_STR_INIT:
2045 case SECCAT_RODATA_MERGE_CONST:
2049 case SECCAT_SRODATA:
2056 /* We don't split these for medium model. Place them into
2057 default sections and hope for best. */
2062 /* We might get called with string constants, but get_named_section
2063 doesn't like them as they are not DECLs. Also, we need to set
2064 flags in that case. */
2066 return get_section (sname, flags, NULL);
2067 return get_named_section (decl, sname, reloc);
2070 return default_elf_select_section (decl, reloc, align);
2073 /* Build up a unique section name, expressed as a
2074 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2075 RELOC indicates whether the initial value of EXP requires
2076 link-time relocations. */
2079 x86_64_elf_unique_section (tree decl, int reloc)
2081 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2082 && ix86_in_large_data_p (decl))
2084 const char *prefix = NULL;
2085 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2086 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2088 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2091 case SECCAT_DATA_REL:
2092 case SECCAT_DATA_REL_LOCAL:
2093 case SECCAT_DATA_REL_RO:
2094 case SECCAT_DATA_REL_RO_LOCAL:
2095 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2098 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2101 case SECCAT_RODATA_MERGE_STR:
2102 case SECCAT_RODATA_MERGE_STR_INIT:
2103 case SECCAT_RODATA_MERGE_CONST:
2104 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2106 case SECCAT_SRODATA:
2113 /* We don't split these for medium model. Place them into
2114 default sections and hope for best. */
2122 plen = strlen (prefix);
2124 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2125 name = targetm.strip_name_encoding (name);
2126 nlen = strlen (name);
2128 string = alloca (nlen + plen + 1);
2129 memcpy (string, prefix, plen);
2130 memcpy (string + plen, name, nlen + 1);
2132 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2136 default_unique_section (decl, reloc);
2139 #ifdef COMMON_ASM_OP
2140 /* This says how to output assembler code to declare an
2141 uninitialized external linkage data object.
2143 For medium model x86-64 we need to use .largecomm opcode for
2146 x86_elf_aligned_common (FILE *file,
2147 const char *name, unsigned HOST_WIDE_INT size,
2150 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2151 && size > (unsigned int)ix86_section_threshold)
2152 fprintf (file, ".largecomm\t");
2154 fprintf (file, "%s", COMMON_ASM_OP);
2155 assemble_name (file, name);
2156 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2157 size, align / BITS_PER_UNIT);
2160 /* Utility function for targets to use in implementing
2161 ASM_OUTPUT_ALIGNED_BSS. */
2164 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2165 const char *name, unsigned HOST_WIDE_INT size,
2168 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2169 && size > (unsigned int)ix86_section_threshold)
2170 switch_to_section (get_named_section (decl, ".lbss", 0));
2172 switch_to_section (bss_section);
2173 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2174 #ifdef ASM_DECLARE_OBJECT_NAME
2175 last_assemble_variable_decl = decl;
2176 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2178 /* Standard thing is just output label for the object. */
2179 ASM_OUTPUT_LABEL (file, name);
2180 #endif /* ASM_DECLARE_OBJECT_NAME */
2181 ASM_OUTPUT_SKIP (file, size ? size : 1);
2186 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2188 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2189 make the problem with not enough registers even worse. */
2190 #ifdef INSN_SCHEDULING
2192 flag_schedule_insns = 0;
2196 /* The Darwin libraries never set errno, so we might as well
2197 avoid calling them when that's the only reason we would. */
2198 flag_errno_math = 0;
2200 /* The default values of these switches depend on the TARGET_64BIT
2201 that is not known at this moment. Mark these values with 2 and
2202 let user the to override these. In case there is no command line option
2203 specifying them, we will set the defaults in override_options. */
2205 flag_omit_frame_pointer = 2;
2206 flag_pcc_struct_return = 2;
2207 flag_asynchronous_unwind_tables = 2;
2208 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2209 SUBTARGET_OPTIMIZATION_OPTIONS;
2213 /* Table of valid machine attributes. */
2214 const struct attribute_spec ix86_attribute_table[] =
2216 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2217 /* Stdcall attribute says callee is responsible for popping arguments
2218 if they are not variable. */
2219 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2220 /* Fastcall attribute says callee is responsible for popping arguments
2221 if they are not variable. */
2222 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2223 /* Cdecl attribute says the callee is a normal C declaration */
2224 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2225 /* Regparm attribute specifies how many integer arguments are to be
2226 passed in registers. */
2227 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2228 /* Sseregparm attribute says we are using x86_64 calling conventions
2229 for FP arguments. */
2230 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2231 /* force_align_arg_pointer says this function realigns the stack at entry. */
2232 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2233 false, true, true, ix86_handle_cconv_attribute },
2234 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2235 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2236 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2237 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2239 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2240 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2241 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2242 SUBTARGET_ATTRIBUTE_TABLE,
2244 { NULL, 0, 0, false, false, false, NULL }
2247 /* Decide whether we can make a sibling call to a function. DECL is the
2248 declaration of the function being targeted by the call and EXP is the
2249 CALL_EXPR representing the call. */
2252 ix86_function_ok_for_sibcall (tree decl, tree exp)
2257 /* If we are generating position-independent code, we cannot sibcall
2258 optimize any indirect call, or a direct call to a global function,
2259 as the PLT requires %ebx be live. */
2260 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2267 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2268 if (POINTER_TYPE_P (func))
2269 func = TREE_TYPE (func);
2272 /* Check that the return value locations are the same. Like
2273 if we are returning floats on the 80387 register stack, we cannot
2274 make a sibcall from a function that doesn't return a float to a
2275 function that does or, conversely, from a function that does return
2276 a float to a function that doesn't; the necessary stack adjustment
2277 would not be executed. This is also the place we notice
2278 differences in the return value ABI. Note that it is ok for one
2279 of the functions to have void return type as long as the return
2280 value of the other is passed in a register. */
2281 a = ix86_function_value (TREE_TYPE (exp), func, false);
2282 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2284 if (STACK_REG_P (a) || STACK_REG_P (b))
2286 if (!rtx_equal_p (a, b))
2289 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2291 else if (!rtx_equal_p (a, b))
2294 /* If this call is indirect, we'll need to be able to use a call-clobbered
2295 register for the address of the target function. Make sure that all
2296 such registers are not used for passing parameters. */
2297 if (!decl && !TARGET_64BIT)
2301 /* We're looking at the CALL_EXPR, we need the type of the function. */
2302 type = TREE_OPERAND (exp, 0); /* pointer expression */
2303 type = TREE_TYPE (type); /* pointer type */
2304 type = TREE_TYPE (type); /* function type */
2306 if (ix86_function_regparm (type, NULL) >= 3)
2308 /* ??? Need to count the actual number of registers to be used,
2309 not the possible number of registers. Fix later. */
2314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2315 /* Dllimport'd functions are also called indirectly. */
2316 if (decl && DECL_DLLIMPORT_P (decl)
2317 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2321 /* If we forced aligned the stack, then sibcalling would unalign the
2322 stack, which may break the called function. */
2323 if (cfun->machine->force_align_arg_pointer)
2326 /* Otherwise okay. That also includes certain types of indirect calls. */
2330 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2331 calling convention attributes;
2332 arguments as in struct attribute_spec.handler. */
2335 ix86_handle_cconv_attribute (tree *node, tree name,
2337 int flags ATTRIBUTE_UNUSED,
2340 if (TREE_CODE (*node) != FUNCTION_TYPE
2341 && TREE_CODE (*node) != METHOD_TYPE
2342 && TREE_CODE (*node) != FIELD_DECL
2343 && TREE_CODE (*node) != TYPE_DECL)
2345 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2346 IDENTIFIER_POINTER (name));
2347 *no_add_attrs = true;
2351 /* Can combine regparm with all attributes but fastcall. */
2352 if (is_attribute_p ("regparm", name))
2356 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2358 error ("fastcall and regparm attributes are not compatible");
2361 cst = TREE_VALUE (args);
2362 if (TREE_CODE (cst) != INTEGER_CST)
2364 warning (OPT_Wattributes,
2365 "%qs attribute requires an integer constant argument",
2366 IDENTIFIER_POINTER (name));
2367 *no_add_attrs = true;
2369 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2371 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2372 IDENTIFIER_POINTER (name), REGPARM_MAX);
2373 *no_add_attrs = true;
2377 && lookup_attribute (ix86_force_align_arg_pointer_string,
2378 TYPE_ATTRIBUTES (*node))
2379 && compare_tree_int (cst, REGPARM_MAX-1))
2381 error ("%s functions limited to %d register parameters",
2382 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2390 warning (OPT_Wattributes, "%qs attribute ignored",
2391 IDENTIFIER_POINTER (name));
2392 *no_add_attrs = true;
2396 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2397 if (is_attribute_p ("fastcall", name))
2399 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2401 error ("fastcall and cdecl attributes are not compatible");
2403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2405 error ("fastcall and stdcall attributes are not compatible");
2407 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2409 error ("fastcall and regparm attributes are not compatible");
2413 /* Can combine stdcall with fastcall (redundant), regparm and
2415 else if (is_attribute_p ("stdcall", name))
2417 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2419 error ("stdcall and cdecl attributes are not compatible");
2421 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2423 error ("stdcall and fastcall attributes are not compatible");
2427 /* Can combine cdecl with regparm and sseregparm. */
2428 else if (is_attribute_p ("cdecl", name))
2430 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2432 error ("stdcall and cdecl attributes are not compatible");
2434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2436 error ("fastcall and cdecl attributes are not compatible");
2440 /* Can combine sseregparm with all attributes. */
2445 /* Return 0 if the attributes for two types are incompatible, 1 if they
2446 are compatible, and 2 if they are nearly compatible (which causes a
2447 warning to be generated). */
2450 ix86_comp_type_attributes (tree type1, tree type2)
2452 /* Check for mismatch of non-default calling convention. */
2453 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2455 if (TREE_CODE (type1) != FUNCTION_TYPE)
2458 /* Check for mismatched fastcall/regparm types. */
2459 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2460 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2461 || (ix86_function_regparm (type1, NULL)
2462 != ix86_function_regparm (type2, NULL)))
2465 /* Check for mismatched sseregparm types. */
2466 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2467 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2470 /* Check for mismatched return types (cdecl vs stdcall). */
2471 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2472 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2478 /* Return the regparm value for a function with the indicated TYPE and DECL.
2479 DECL may be NULL when calling function indirectly
2480 or considering a libcall. */
2483 ix86_function_regparm (tree type, tree decl)
2486 int regparm = ix86_regparm;
2487 bool user_convention = false;
2491 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2494 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2495 user_convention = true;
2498 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2501 user_convention = true;
2504 /* Use register calling convention for local functions when possible. */
2505 if (!TARGET_64BIT && !user_convention && decl
2506 && flag_unit_at_a_time && !profile_flag)
2508 struct cgraph_local_info *i = cgraph_local_info (decl);
2511 int local_regparm, globals = 0, regno;
2513 /* Make sure no regparm register is taken by a global register
2515 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2516 if (global_regs[local_regparm])
2518 /* We can't use regparm(3) for nested functions as these use
2519 static chain pointer in third argument. */
2520 if (local_regparm == 3
2521 && decl_function_context (decl)
2522 && !DECL_NO_STATIC_CHAIN (decl))
2524 /* If the function realigns its stackpointer, the
2525 prologue will clobber %ecx. If we've already
2526 generated code for the callee, the callee
2527 DECL_STRUCT_FUNCTION is gone, so we fall back to
2528 scanning the attributes for the self-realigning
2530 if ((DECL_STRUCT_FUNCTION (decl)
2531 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2532 || (!DECL_STRUCT_FUNCTION (decl)
2533 && lookup_attribute (ix86_force_align_arg_pointer_string,
2534 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2536 /* Each global register variable increases register preassure,
2537 so the more global reg vars there are, the smaller regparm
2538 optimization use, unless requested by the user explicitly. */
2539 for (regno = 0; regno < 6; regno++)
2540 if (global_regs[regno])
2543 = globals < local_regparm ? local_regparm - globals : 0;
2545 if (local_regparm > regparm)
2546 regparm = local_regparm;
2553 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2554 in SSE registers for a function with the indicated TYPE and DECL.
2555 DECL may be NULL when calling function indirectly
2556 or considering a libcall. Otherwise return 0. */
2559 ix86_function_sseregparm (tree type, tree decl)
2561 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2562 by the sseregparm attribute. */
2563 if (TARGET_SSEREGPARM
2565 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2570 error ("Calling %qD with attribute sseregparm without "
2571 "SSE/SSE2 enabled", decl);
2573 error ("Calling %qT with attribute sseregparm without "
2574 "SSE/SSE2 enabled", type);
2581 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2582 in SSE registers even for 32-bit mode and not just 3, but up to
2583 8 SSE arguments in registers. */
2584 if (!TARGET_64BIT && decl
2585 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2587 struct cgraph_local_info *i = cgraph_local_info (decl);
2589 return TARGET_SSE2 ? 2 : 1;
2595 /* Return true if EAX is live at the start of the function. Used by
2596 ix86_expand_prologue to determine if we need special help before
2597 calling allocate_stack_worker. */
2600 ix86_eax_live_at_start_p (void)
2602 /* Cheat. Don't bother working forward from ix86_function_regparm
2603 to the function type to whether an actual argument is located in
2604 eax. Instead just look at cfg info, which is still close enough
2605 to correct at this point. This gives false positives for broken
2606 functions that might use uninitialized data that happens to be
2607 allocated in eax, but who cares? */
2608 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2611 /* Value is the number of bytes of arguments automatically
2612 popped when returning from a subroutine call.
2613 FUNDECL is the declaration node of the function (as a tree),
2614 FUNTYPE is the data type of the function (as a tree),
2615 or for a library call it is an identifier node for the subroutine name.
2616 SIZE is the number of bytes of arguments passed on the stack.
2618 On the 80386, the RTD insn may be used to pop them if the number
2619 of args is fixed, but if the number is variable then the caller
2620 must pop them all. RTD can't be used for library calls now
2621 because the library is compiled with the Unix compiler.
2622 Use of RTD is a selectable option, since it is incompatible with
2623 standard Unix calling sequences. If the option is not selected,
2624 the caller must always pop the args.
2626 The attribute stdcall is equivalent to RTD on a per module basis. */
2629 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2631 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2633 /* Cdecl functions override -mrtd, and never pop the stack. */
2634 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2636 /* Stdcall and fastcall functions will pop the stack if not
2638 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2639 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2643 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2644 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2645 == void_type_node)))
2649 /* Lose any fake structure return argument if it is passed on the stack. */
2650 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2652 && !KEEP_AGGREGATE_RETURN_POINTER)
2654 int nregs = ix86_function_regparm (funtype, fundecl);
2657 return GET_MODE_SIZE (Pmode);
2663 /* Argument support functions. */
2665 /* Return true when register may be used to pass function parameters. */
2667 ix86_function_arg_regno_p (int regno)
2671 return (regno < REGPARM_MAX
2672 || (TARGET_MMX && MMX_REGNO_P (regno)
2673 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2674 || (TARGET_SSE && SSE_REGNO_P (regno)
2675 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2677 if (TARGET_SSE && SSE_REGNO_P (regno)
2678 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2680 /* RAX is used as hidden argument to va_arg functions. */
2683 for (i = 0; i < REGPARM_MAX; i++)
2684 if (regno == x86_64_int_parameter_registers[i])
2689 /* Return if we do not know how to pass TYPE solely in registers. */
2692 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2694 if (must_pass_in_stack_var_size_or_pad (mode, type))
2697 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2698 The layout_type routine is crafty and tries to trick us into passing
2699 currently unsupported vector types on the stack by using TImode. */
2700 return (!TARGET_64BIT && mode == TImode
2701 && type && TREE_CODE (type) != VECTOR_TYPE);
2704 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2705 for a call to a function whose data type is FNTYPE.
2706 For a library call, FNTYPE is 0. */
2709 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2710 tree fntype, /* tree ptr for function decl */
2711 rtx libname, /* SYMBOL_REF of library name or 0 */
2714 static CUMULATIVE_ARGS zero_cum;
2715 tree param, next_param;
2717 if (TARGET_DEBUG_ARG)
2719 fprintf (stderr, "\ninit_cumulative_args (");
2721 fprintf (stderr, "fntype code = %s, ret code = %s",
2722 tree_code_name[(int) TREE_CODE (fntype)],
2723 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2725 fprintf (stderr, "no fntype");
2728 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2733 /* Set up the number of registers to use for passing arguments. */
2734 cum->nregs = ix86_regparm;
2736 cum->sse_nregs = SSE_REGPARM_MAX;
2738 cum->mmx_nregs = MMX_REGPARM_MAX;
2739 cum->warn_sse = true;
2740 cum->warn_mmx = true;
2741 cum->maybe_vaarg = false;
2743 /* Use ecx and edx registers if function has fastcall attribute,
2744 else look for regparm information. */
2745 if (fntype && !TARGET_64BIT)
2747 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2753 cum->nregs = ix86_function_regparm (fntype, fndecl);
2756 /* Set up the number of SSE registers used for passing SFmode
2757 and DFmode arguments. Warn for mismatching ABI. */
2758 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2760 /* Determine if this function has variable arguments. This is
2761 indicated by the last argument being 'void_type_mode' if there
2762 are no variable arguments. If there are variable arguments, then
2763 we won't pass anything in registers in 32-bit mode. */
2765 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2767 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2768 param != 0; param = next_param)
2770 next_param = TREE_CHAIN (param);
2771 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2781 cum->float_in_sse = 0;
2783 cum->maybe_vaarg = true;
2787 if ((!fntype && !libname)
2788 || (fntype && !TYPE_ARG_TYPES (fntype)))
2789 cum->maybe_vaarg = true;
2791 if (TARGET_DEBUG_ARG)
2792 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2797 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2798 But in the case of vector types, it is some vector mode.
2800 When we have only some of our vector isa extensions enabled, then there
2801 are some modes for which vector_mode_supported_p is false. For these
2802 modes, the generic vector support in gcc will choose some non-vector mode
2803 in order to implement the type. By computing the natural mode, we'll
2804 select the proper ABI location for the operand and not depend on whatever
2805 the middle-end decides to do with these vector types. */
2807 static enum machine_mode
2808 type_natural_mode (tree type)
2810 enum machine_mode mode = TYPE_MODE (type);
2812 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2814 HOST_WIDE_INT size = int_size_in_bytes (type);
2815 if ((size == 8 || size == 16)
2816 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2817 && TYPE_VECTOR_SUBPARTS (type) > 1)
2819 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2821 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2822 mode = MIN_MODE_VECTOR_FLOAT;
2824 mode = MIN_MODE_VECTOR_INT;
2826 /* Get the mode which has this inner mode and number of units. */
2827 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2828 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2829 && GET_MODE_INNER (mode) == innermode)
2839 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2840 this may not agree with the mode that the type system has chosen for the
2841 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2842 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2845 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2850 if (orig_mode != BLKmode)
2851 tmp = gen_rtx_REG (orig_mode, regno);
2854 tmp = gen_rtx_REG (mode, regno);
2855 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2856 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2862 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2863 of this code is to classify each 8bytes of incoming argument by the register
2864 class and assign registers accordingly. */
2866 /* Return the union class of CLASS1 and CLASS2.
2867 See the x86-64 PS ABI for details. */
2869 static enum x86_64_reg_class
2870 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2872 /* Rule #1: If both classes are equal, this is the resulting class. */
2873 if (class1 == class2)
2876 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2878 if (class1 == X86_64_NO_CLASS)
2880 if (class2 == X86_64_NO_CLASS)
2883 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2884 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2885 return X86_64_MEMORY_CLASS;
2887 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2888 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2889 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2890 return X86_64_INTEGERSI_CLASS;
2891 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2892 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2893 return X86_64_INTEGER_CLASS;
2895 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2897 if (class1 == X86_64_X87_CLASS
2898 || class1 == X86_64_X87UP_CLASS
2899 || class1 == X86_64_COMPLEX_X87_CLASS
2900 || class2 == X86_64_X87_CLASS
2901 || class2 == X86_64_X87UP_CLASS
2902 || class2 == X86_64_COMPLEX_X87_CLASS)
2903 return X86_64_MEMORY_CLASS;
2905 /* Rule #6: Otherwise class SSE is used. */
2906 return X86_64_SSE_CLASS;
2909 /* Classify the argument of type TYPE and mode MODE.
2910 CLASSES will be filled by the register class used to pass each word
2911 of the operand. The number of words is returned. In case the parameter
2912 should be passed in memory, 0 is returned. As a special case for zero
2913 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2915 BIT_OFFSET is used internally for handling records and specifies offset
2916 of the offset in bits modulo 256 to avoid overflow cases.
2918 See the x86-64 PS ABI for details.
2922 classify_argument (enum machine_mode mode, tree type,
2923 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2925 HOST_WIDE_INT bytes =
2926 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2927 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2929 /* Variable sized entities are always passed/returned in memory. */
2933 if (mode != VOIDmode
2934 && targetm.calls.must_pass_in_stack (mode, type))
2937 if (type && AGGREGATE_TYPE_P (type))
2941 enum x86_64_reg_class subclasses[MAX_CLASSES];
2943 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2947 for (i = 0; i < words; i++)
2948 classes[i] = X86_64_NO_CLASS;
2950 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2951 signalize memory class, so handle it as special case. */
2954 classes[0] = X86_64_NO_CLASS;
2958 /* Classify each field of record and merge classes. */
2959 switch (TREE_CODE (type))
2962 /* And now merge the fields of structure. */
2963 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2965 if (TREE_CODE (field) == FIELD_DECL)
2969 if (TREE_TYPE (field) == error_mark_node)
2972 /* Bitfields are always classified as integer. Handle them
2973 early, since later code would consider them to be
2974 misaligned integers. */
2975 if (DECL_BIT_FIELD (field))
2977 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2978 i < ((int_bit_position (field) + (bit_offset % 64))
2979 + tree_low_cst (DECL_SIZE (field), 0)
2982 merge_classes (X86_64_INTEGER_CLASS,
2987 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2988 TREE_TYPE (field), subclasses,
2989 (int_bit_position (field)
2990 + bit_offset) % 256);
2993 for (i = 0; i < num; i++)
2996 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2998 merge_classes (subclasses[i], classes[i + pos]);
3006 /* Arrays are handled as small records. */
3009 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3010 TREE_TYPE (type), subclasses, bit_offset);
3014 /* The partial classes are now full classes. */
3015 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3016 subclasses[0] = X86_64_SSE_CLASS;
3017 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3018 subclasses[0] = X86_64_INTEGER_CLASS;
3020 for (i = 0; i < words; i++)
3021 classes[i] = subclasses[i % num];
3026 case QUAL_UNION_TYPE:
3027 /* Unions are similar to RECORD_TYPE but offset is always 0.
3029 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3031 if (TREE_CODE (field) == FIELD_DECL)
3035 if (TREE_TYPE (field) == error_mark_node)
3038 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3039 TREE_TYPE (field), subclasses,
3043 for (i = 0; i < num; i++)
3044 classes[i] = merge_classes (subclasses[i], classes[i]);
3053 /* Final merger cleanup. */
3054 for (i = 0; i < words; i++)
3056 /* If one class is MEMORY, everything should be passed in
3058 if (classes[i] == X86_64_MEMORY_CLASS)
3061 /* The X86_64_SSEUP_CLASS should be always preceded by
3062 X86_64_SSE_CLASS. */
3063 if (classes[i] == X86_64_SSEUP_CLASS
3064 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3065 classes[i] = X86_64_SSE_CLASS;
3067 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3068 if (classes[i] == X86_64_X87UP_CLASS
3069 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3070 classes[i] = X86_64_SSE_CLASS;
3075 /* Compute alignment needed. We align all types to natural boundaries with
3076 exception of XFmode that is aligned to 64bits. */
3077 if (mode != VOIDmode && mode != BLKmode)
3079 int mode_alignment = GET_MODE_BITSIZE (mode);
3082 mode_alignment = 128;
3083 else if (mode == XCmode)
3084 mode_alignment = 256;
3085 if (COMPLEX_MODE_P (mode))
3086 mode_alignment /= 2;
3087 /* Misaligned fields are always returned in memory. */
3088 if (bit_offset % mode_alignment)
3092 /* for V1xx modes, just use the base mode */
3093 if (VECTOR_MODE_P (mode)
3094 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3095 mode = GET_MODE_INNER (mode);
3097 /* Classification of atomic types. */
3102 classes[0] = X86_64_SSE_CLASS;
3105 classes[0] = X86_64_SSE_CLASS;
3106 classes[1] = X86_64_SSEUP_CLASS;
3115 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3116 classes[0] = X86_64_INTEGERSI_CLASS;
3118 classes[0] = X86_64_INTEGER_CLASS;
3122 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3127 if (!(bit_offset % 64))
3128 classes[0] = X86_64_SSESF_CLASS;
3130 classes[0] = X86_64_SSE_CLASS;
3133 classes[0] = X86_64_SSEDF_CLASS;
3136 classes[0] = X86_64_X87_CLASS;
3137 classes[1] = X86_64_X87UP_CLASS;
3140 classes[0] = X86_64_SSE_CLASS;
3141 classes[1] = X86_64_SSEUP_CLASS;
3144 classes[0] = X86_64_SSE_CLASS;
3147 classes[0] = X86_64_SSEDF_CLASS;
3148 classes[1] = X86_64_SSEDF_CLASS;
3151 classes[0] = X86_64_COMPLEX_X87_CLASS;
3154 /* This modes is larger than 16 bytes. */
3162 classes[0] = X86_64_SSE_CLASS;
3163 classes[1] = X86_64_SSEUP_CLASS;
3169 classes[0] = X86_64_SSE_CLASS;
3175 gcc_assert (VECTOR_MODE_P (mode));
3180 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3182 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3183 classes[0] = X86_64_INTEGERSI_CLASS;
3185 classes[0] = X86_64_INTEGER_CLASS;
3186 classes[1] = X86_64_INTEGER_CLASS;
3187 return 1 + (bytes > 8);
3191 /* Examine the argument and return set number of register required in each
3192 class. Return 0 iff parameter should be passed in memory. */
3194 examine_argument (enum machine_mode mode, tree type, int in_return,
3195 int *int_nregs, int *sse_nregs)
3197 enum x86_64_reg_class class[MAX_CLASSES];
3198 int n = classify_argument (mode, type, class, 0);
3204 for (n--; n >= 0; n--)
3207 case X86_64_INTEGER_CLASS:
3208 case X86_64_INTEGERSI_CLASS:
3211 case X86_64_SSE_CLASS:
3212 case X86_64_SSESF_CLASS:
3213 case X86_64_SSEDF_CLASS:
3216 case X86_64_NO_CLASS:
3217 case X86_64_SSEUP_CLASS:
3219 case X86_64_X87_CLASS:
3220 case X86_64_X87UP_CLASS:
3224 case X86_64_COMPLEX_X87_CLASS:
3225 return in_return ? 2 : 0;
3226 case X86_64_MEMORY_CLASS:
3232 /* Construct container for the argument used by GCC interface. See
3233 FUNCTION_ARG for the detailed description. */
3236 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3237 tree type, int in_return, int nintregs, int nsseregs,
3238 const int *intreg, int sse_regno)
3240 /* The following variables hold the static issued_error state. */
3241 static bool issued_sse_arg_error;
3242 static bool issued_sse_ret_error;
3243 static bool issued_x87_ret_error;
3245 enum machine_mode tmpmode;
3247 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3248 enum x86_64_reg_class class[MAX_CLASSES];
3252 int needed_sseregs, needed_intregs;
3253 rtx exp[MAX_CLASSES];
3256 n = classify_argument (mode, type, class, 0);
3257 if (TARGET_DEBUG_ARG)
3260 fprintf (stderr, "Memory class\n");
3263 fprintf (stderr, "Classes:");
3264 for (i = 0; i < n; i++)
3266 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3268 fprintf (stderr, "\n");
3273 if (!examine_argument (mode, type, in_return, &needed_intregs,
3276 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3279 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3280 some less clueful developer tries to use floating-point anyway. */
3281 if (needed_sseregs && !TARGET_SSE)
3285 if (!issued_sse_ret_error)
3287 error ("SSE register return with SSE disabled");
3288 issued_sse_ret_error = true;
3291 else if (!issued_sse_arg_error)
3293 error ("SSE register argument with SSE disabled");
3294 issued_sse_arg_error = true;
3299 /* Likewise, error if the ABI requires us to return values in the
3300 x87 registers and the user specified -mno-80387. */
3301 if (!TARGET_80387 && in_return)
3302 for (i = 0; i < n; i++)
3303 if (class[i] == X86_64_X87_CLASS
3304 || class[i] == X86_64_X87UP_CLASS
3305 || class[i] == X86_64_COMPLEX_X87_CLASS)
3307 if (!issued_x87_ret_error)
3309 error ("x87 register return with x87 disabled");
3310 issued_x87_ret_error = true;
3315 /* First construct simple cases. Avoid SCmode, since we want to use
3316 single register to pass this type. */
3317 if (n == 1 && mode != SCmode)
3320 case X86_64_INTEGER_CLASS:
3321 case X86_64_INTEGERSI_CLASS:
3322 return gen_rtx_REG (mode, intreg[0]);
3323 case X86_64_SSE_CLASS:
3324 case X86_64_SSESF_CLASS:
3325 case X86_64_SSEDF_CLASS:
3326 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3327 case X86_64_X87_CLASS:
3328 case X86_64_COMPLEX_X87_CLASS:
3329 return gen_rtx_REG (mode, FIRST_STACK_REG);
3330 case X86_64_NO_CLASS:
3331 /* Zero sized array, struct or class. */
3336 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3338 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3340 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3341 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3342 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3343 && class[1] == X86_64_INTEGER_CLASS
3344 && (mode == CDImode || mode == TImode || mode == TFmode)
3345 && intreg[0] + 1 == intreg[1])
3346 return gen_rtx_REG (mode, intreg[0]);
3348 /* Otherwise figure out the entries of the PARALLEL. */
3349 for (i = 0; i < n; i++)
3353 case X86_64_NO_CLASS:
3355 case X86_64_INTEGER_CLASS:
3356 case X86_64_INTEGERSI_CLASS:
3357 /* Merge TImodes on aligned occasions here too. */
3358 if (i * 8 + 8 > bytes)
3359 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3360 else if (class[i] == X86_64_INTEGERSI_CLASS)
3364 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3365 if (tmpmode == BLKmode)
3367 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3368 gen_rtx_REG (tmpmode, *intreg),
3372 case X86_64_SSESF_CLASS:
3373 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3374 gen_rtx_REG (SFmode,
3375 SSE_REGNO (sse_regno)),
3379 case X86_64_SSEDF_CLASS:
3380 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3381 gen_rtx_REG (DFmode,
3382 SSE_REGNO (sse_regno)),
3386 case X86_64_SSE_CLASS:
3387 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3391 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3392 gen_rtx_REG (tmpmode,
3393 SSE_REGNO (sse_regno)),
3395 if (tmpmode == TImode)
3404 /* Empty aligned struct, union or class. */
3408 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3409 for (i = 0; i < nexps; i++)
3410 XVECEXP (ret, 0, i) = exp [i];
3414 /* Update the data in CUM to advance over an argument
3415 of mode MODE and data type TYPE.
3416 (TYPE is null for libcalls where that information may not be available.) */
3419 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3420 tree type, int named)
3423 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3424 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3427 mode = type_natural_mode (type);
3429 if (TARGET_DEBUG_ARG)
3430 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3431 "mode=%s, named=%d)\n\n",
3432 words, cum->words, cum->nregs, cum->sse_nregs,
3433 GET_MODE_NAME (mode), named);
3437 int int_nregs, sse_nregs;
3438 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3439 cum->words += words;
3440 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3442 cum->nregs -= int_nregs;
3443 cum->sse_nregs -= sse_nregs;
3444 cum->regno += int_nregs;
3445 cum->sse_regno += sse_nregs;
3448 cum->words += words;
3466 cum->words += words;
3467 cum->nregs -= words;
3468 cum->regno += words;
3470 if (cum->nregs <= 0)
3478 if (cum->float_in_sse < 2)
3481 if (cum->float_in_sse < 1)
3492 if (!type || !AGGREGATE_TYPE_P (type))
3494 cum->sse_words += words;
3495 cum->sse_nregs -= 1;
3496 cum->sse_regno += 1;
3497 if (cum->sse_nregs <= 0)
3509 if (!type || !AGGREGATE_TYPE_P (type))
3511 cum->mmx_words += words;
3512 cum->mmx_nregs -= 1;
3513 cum->mmx_regno += 1;
3514 if (cum->mmx_nregs <= 0)
3525 /* Define where to put the arguments to a function.
3526 Value is zero to push the argument on the stack,
3527 or a hard register in which to store the argument.
3529 MODE is the argument's machine mode.
3530 TYPE is the data type of the argument (as a tree).
3531 This is null for libcalls where that information may
3533 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3534 the preceding args and about the function being called.
3535 NAMED is nonzero if this argument is a named parameter
3536 (otherwise it is an extra parameter matching an ellipsis). */
3539 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3540 tree type, int named)
3542 enum machine_mode mode = orig_mode;
3545 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3546 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3547 static bool warnedsse, warnedmmx;
3549 /* To simplify the code below, represent vector types with a vector mode
3550 even if MMX/SSE are not active. */
3551 if (type && TREE_CODE (type) == VECTOR_TYPE)
3552 mode = type_natural_mode (type);
3554 /* Handle a hidden AL argument containing number of registers for varargs
3555 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3557 if (mode == VOIDmode)
3560 return GEN_INT (cum->maybe_vaarg
3561 ? (cum->sse_nregs < 0
3569 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3571 &x86_64_int_parameter_registers [cum->regno],
3576 /* For now, pass fp/complex values on the stack. */
3588 if (words <= cum->nregs)
3590 int regno = cum->regno;
3592 /* Fastcall allocates the first two DWORD (SImode) or
3593 smaller arguments to ECX and EDX. */
3596 if (mode == BLKmode || mode == DImode)
3599 /* ECX not EAX is the first allocated register. */
3603 ret = gen_rtx_REG (mode, regno);
3607 if (cum->float_in_sse < 2)
3610 if (cum->float_in_sse < 1)
3620 if (!type || !AGGREGATE_TYPE_P (type))
3622 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3625 warning (0, "SSE vector argument without SSE enabled "
3629 ret = gen_reg_or_parallel (mode, orig_mode,
3630 cum->sse_regno + FIRST_SSE_REG);
3637 if (!type || !AGGREGATE_TYPE_P (type))
3639 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3642 warning (0, "MMX vector argument without MMX enabled "
3646 ret = gen_reg_or_parallel (mode, orig_mode,
3647 cum->mmx_regno + FIRST_MMX_REG);
3652 if (TARGET_DEBUG_ARG)
3655 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3656 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3659 print_simple_rtl (stderr, ret);
3661 fprintf (stderr, ", stack");
3663 fprintf (stderr, " )\n");
3669 /* A C expression that indicates when an argument must be passed by
3670 reference. If nonzero for an argument, a copy of that argument is
3671 made in memory and a pointer to the argument is passed instead of
3672 the argument itself. The pointer is passed in whatever way is
3673 appropriate for passing a pointer to that type. */
3676 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3677 enum machine_mode mode ATTRIBUTE_UNUSED,
3678 tree type, bool named ATTRIBUTE_UNUSED)
3683 if (type && int_size_in_bytes (type) == -1)
3685 if (TARGET_DEBUG_ARG)
3686 fprintf (stderr, "function_arg_pass_by_reference\n");
3693 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3694 ABI. Only called if TARGET_SSE. */
3696 contains_128bit_aligned_vector_p (tree type)
3698 enum machine_mode mode = TYPE_MODE (type);
3699 if (SSE_REG_MODE_P (mode)
3700 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3702 if (TYPE_ALIGN (type) < 128)
3705 if (AGGREGATE_TYPE_P (type))
3707 /* Walk the aggregates recursively. */
3708 switch (TREE_CODE (type))
3712 case QUAL_UNION_TYPE:
3716 /* Walk all the structure fields. */
3717 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3719 if (TREE_CODE (field) == FIELD_DECL
3720 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3727 /* Just for use if some languages passes arrays by value. */
3728 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3739 /* Gives the alignment boundary, in bits, of an argument with the
3740 specified mode and type. */
3743 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3747 align = TYPE_ALIGN (type);
3749 align = GET_MODE_ALIGNMENT (mode);
3750 if (align < PARM_BOUNDARY)
3751 align = PARM_BOUNDARY;
3754 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3755 make an exception for SSE modes since these require 128bit
3758 The handling here differs from field_alignment. ICC aligns MMX
3759 arguments to 4 byte boundaries, while structure fields are aligned
3760 to 8 byte boundaries. */
3762 align = PARM_BOUNDARY;
3765 if (!SSE_REG_MODE_P (mode))
3766 align = PARM_BOUNDARY;
3770 if (!contains_128bit_aligned_vector_p (type))
3771 align = PARM_BOUNDARY;
3779 /* Return true if N is a possible register number of function value. */
3781 ix86_function_value_regno_p (int regno)
3784 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3785 || (regno == FIRST_SSE_REG && TARGET_SSE))
3789 && (regno == FIRST_MMX_REG && TARGET_MMX))
3795 /* Define how to find the value returned by a function.
3796 VALTYPE is the data type of the value (as a tree).
3797 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3798 otherwise, FUNC is 0. */
3800 ix86_function_value (tree valtype, tree fntype_or_decl,
3801 bool outgoing ATTRIBUTE_UNUSED)
3803 enum machine_mode natmode = type_natural_mode (valtype);
3807 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3808 1, REGPARM_MAX, SSE_REGPARM_MAX,
3809 x86_64_int_return_registers, 0);
3810 /* For zero sized structures, construct_container return NULL, but we
3811 need to keep rest of compiler happy by returning meaningful value. */
3813 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3818 tree fn = NULL_TREE, fntype;
3820 && DECL_P (fntype_or_decl))
3821 fn = fntype_or_decl;
3822 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3823 return gen_rtx_REG (TYPE_MODE (valtype),
3824 ix86_value_regno (natmode, fn, fntype));
3828 /* Return true iff type is returned in memory. */
3830 ix86_return_in_memory (tree type)
3832 int needed_intregs, needed_sseregs, size;
3833 enum machine_mode mode = type_natural_mode (type);
3836 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3838 if (mode == BLKmode)
3841 size = int_size_in_bytes (type);
3843 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3846 if (VECTOR_MODE_P (mode) || mode == TImode)
3848 /* User-created vectors small enough to fit in EAX. */
3852 /* MMX/3dNow values are returned in MM0,
3853 except when it doesn't exits. */
3855 return (TARGET_MMX ? 0 : 1);
3857 /* SSE values are returned in XMM0, except when it doesn't exist. */
3859 return (TARGET_SSE ? 0 : 1);
3873 /* When returning SSE vector types, we have a choice of either
3874 (1) being abi incompatible with a -march switch, or
3875 (2) generating an error.
3876 Given no good solution, I think the safest thing is one warning.
3877 The user won't be able to use -Werror, but....
3879 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3880 called in response to actually generating a caller or callee that
3881 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3882 via aggregate_value_p for general type probing from tree-ssa. */
3885 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3887 static bool warnedsse, warnedmmx;
3891 /* Look at the return type of the function, not the function type. */
3892 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3894 if (!TARGET_SSE && !warnedsse)
3897 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3900 warning (0, "SSE vector return without SSE enabled "
3905 if (!TARGET_MMX && !warnedmmx)
3907 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3910 warning (0, "MMX vector return without MMX enabled "
3919 /* Define how to find the value returned by a library function
3920 assuming the value has mode MODE. */
3922 ix86_libcall_value (enum machine_mode mode)
3936 return gen_rtx_REG (mode, FIRST_SSE_REG);
3939 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3943 return gen_rtx_REG (mode, 0);
3947 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3950 /* Given a mode, return the register to use for a return value. */
3953 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3955 gcc_assert (!TARGET_64BIT);
3957 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3958 we normally prevent this case when mmx is not available. However
3959 some ABIs may require the result to be returned like DImode. */
3960 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3961 return TARGET_MMX ? FIRST_MMX_REG : 0;
3963 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3964 we prevent this case when sse is not available. However some ABIs
3965 may require the result to be returned like integer TImode. */
3966 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3967 return TARGET_SSE ? FIRST_SSE_REG : 0;
3969 /* Decimal floating point values can go in %eax, unlike other float modes. */
3970 if (DECIMAL_FLOAT_MODE_P (mode))
3973 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3974 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3977 /* Floating point return values in %st(0), except for local functions when
3978 SSE math is enabled or for functions with sseregparm attribute. */
3979 if ((func || fntype)
3980 && (mode == SFmode || mode == DFmode))
3982 int sse_level = ix86_function_sseregparm (fntype, func);
3983 if ((sse_level >= 1 && mode == SFmode)
3984 || (sse_level == 2 && mode == DFmode))
3985 return FIRST_SSE_REG;
3988 return FIRST_FLOAT_REG;
3991 /* Create the va_list data type. */
3994 ix86_build_builtin_va_list (void)
3996 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3998 /* For i386 we use plain pointer to argument area. */
4000 return build_pointer_type (char_type_node);
4002 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4003 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4005 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4006 unsigned_type_node);
4007 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4008 unsigned_type_node);
4009 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4011 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4014 va_list_gpr_counter_field = f_gpr;
4015 va_list_fpr_counter_field = f_fpr;
4017 DECL_FIELD_CONTEXT (f_gpr) = record;
4018 DECL_FIELD_CONTEXT (f_fpr) = record;
4019 DECL_FIELD_CONTEXT (f_ovf) = record;
4020 DECL_FIELD_CONTEXT (f_sav) = record;
4022 TREE_CHAIN (record) = type_decl;
4023 TYPE_NAME (record) = type_decl;
4024 TYPE_FIELDS (record) = f_gpr;
4025 TREE_CHAIN (f_gpr) = f_fpr;
4026 TREE_CHAIN (f_fpr) = f_ovf;
4027 TREE_CHAIN (f_ovf) = f_sav;
4029 layout_type (record);
4031 /* The correct type is an array type of one element. */
4032 return build_array_type (record, build_index_type (size_zero_node));
4035 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4038 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4039 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4042 CUMULATIVE_ARGS next_cum;
4043 rtx save_area = NULL_RTX, mem;
4056 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4059 /* Indicate to allocate space on the stack for varargs save area. */
4060 ix86_save_varrargs_registers = 1;
4062 cfun->stack_alignment_needed = 128;
4064 fntype = TREE_TYPE (current_function_decl);
4065 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4066 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4067 != void_type_node));
4069 /* For varargs, we do not want to skip the dummy va_dcl argument.
4070 For stdargs, we do want to skip the last named argument. */
4073 function_arg_advance (&next_cum, mode, type, 1);
4076 save_area = frame_pointer_rtx;
4078 set = get_varargs_alias_set ();
4080 for (i = next_cum.regno;
4082 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4085 mem = gen_rtx_MEM (Pmode,
4086 plus_constant (save_area, i * UNITS_PER_WORD));
4087 MEM_NOTRAP_P (mem) = 1;
4088 set_mem_alias_set (mem, set);
4089 emit_move_insn (mem, gen_rtx_REG (Pmode,
4090 x86_64_int_parameter_registers[i]));
4093 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4095 /* Now emit code to save SSE registers. The AX parameter contains number
4096 of SSE parameter registers used to call this function. We use
4097 sse_prologue_save insn template that produces computed jump across
4098 SSE saves. We need some preparation work to get this working. */
4100 label = gen_label_rtx ();
4101 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4103 /* Compute address to jump to :
4104 label - 5*eax + nnamed_sse_arguments*5 */
4105 tmp_reg = gen_reg_rtx (Pmode);
4106 nsse_reg = gen_reg_rtx (Pmode);
4107 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4108 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4109 gen_rtx_MULT (Pmode, nsse_reg,
4111 if (next_cum.sse_regno)
4114 gen_rtx_CONST (DImode,
4115 gen_rtx_PLUS (DImode,
4117 GEN_INT (next_cum.sse_regno * 4))));
4119 emit_move_insn (nsse_reg, label_ref);
4120 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4122 /* Compute address of memory block we save into. We always use pointer
4123 pointing 127 bytes after first byte to store - this is needed to keep
4124 instruction size limited by 4 bytes. */
4125 tmp_reg = gen_reg_rtx (Pmode);
4126 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4127 plus_constant (save_area,
4128 8 * REGPARM_MAX + 127)));
4129 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4130 MEM_NOTRAP_P (mem) = 1;
4131 set_mem_alias_set (mem, set);
4132 set_mem_align (mem, BITS_PER_WORD);
4134 /* And finally do the dirty job! */
4135 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4136 GEN_INT (next_cum.sse_regno), label));
4141 /* Implement va_start. */
4144 ix86_va_start (tree valist, rtx nextarg)
4146 HOST_WIDE_INT words, n_gpr, n_fpr;
4147 tree f_gpr, f_fpr, f_ovf, f_sav;
4148 tree gpr, fpr, ovf, sav, t;
4151 /* Only 64bit target needs something special. */
4154 std_expand_builtin_va_start (valist, nextarg);
4158 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4159 f_fpr = TREE_CHAIN (f_gpr);
4160 f_ovf = TREE_CHAIN (f_fpr);
4161 f_sav = TREE_CHAIN (f_ovf);
4163 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4164 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4165 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4166 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4167 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4169 /* Count number of gp and fp argument registers used. */
4170 words = current_function_args_info.words;
4171 n_gpr = current_function_args_info.regno;
4172 n_fpr = current_function_args_info.sse_regno;
4174 if (TARGET_DEBUG_ARG)
4175 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4176 (int) words, (int) n_gpr, (int) n_fpr);
4178 if (cfun->va_list_gpr_size)
4180 type = TREE_TYPE (gpr);
4181 t = build2 (MODIFY_EXPR, type, gpr,
4182 build_int_cst (type, n_gpr * 8));
4183 TREE_SIDE_EFFECTS (t) = 1;
4184 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4187 if (cfun->va_list_fpr_size)
4189 type = TREE_TYPE (fpr);
4190 t = build2 (MODIFY_EXPR, type, fpr,
4191 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4192 TREE_SIDE_EFFECTS (t) = 1;
4193 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4196 /* Find the overflow area. */
4197 type = TREE_TYPE (ovf);
4198 t = make_tree (type, virtual_incoming_args_rtx);
4200 t = build2 (PLUS_EXPR, type, t,
4201 build_int_cst (type, words * UNITS_PER_WORD));
4202 t = build2 (MODIFY_EXPR, type, ovf, t);
4203 TREE_SIDE_EFFECTS (t) = 1;
4204 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4206 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4208 /* Find the register save area.
4209 Prologue of the function save it right above stack frame. */
4210 type = TREE_TYPE (sav);
4211 t = make_tree (type, frame_pointer_rtx);
4212 t = build2 (MODIFY_EXPR, type, sav, t);
4213 TREE_SIDE_EFFECTS (t) = 1;
4214 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4218 /* Implement va_arg. */
4221 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4223 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4224 tree f_gpr, f_fpr, f_ovf, f_sav;
4225 tree gpr, fpr, ovf, sav, t;
4227 tree lab_false, lab_over = NULL_TREE;
4232 enum machine_mode nat_mode;
4234 /* Only 64bit target needs something special. */
4236 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4238 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4239 f_fpr = TREE_CHAIN (f_gpr);
4240 f_ovf = TREE_CHAIN (f_fpr);
4241 f_sav = TREE_CHAIN (f_ovf);
4243 valist = build_va_arg_indirect_ref (valist);
4244 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4245 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4246 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4247 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4249 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4251 type = build_pointer_type (type);
4252 size = int_size_in_bytes (type);
4253 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4255 nat_mode = type_natural_mode (type);
4256 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4257 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4259 /* Pull the value out of the saved registers. */
4261 addr = create_tmp_var (ptr_type_node, "addr");
4262 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4266 int needed_intregs, needed_sseregs;
4268 tree int_addr, sse_addr;
4270 lab_false = create_artificial_label ();
4271 lab_over = create_artificial_label ();
4273 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4275 need_temp = (!REG_P (container)
4276 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4277 || TYPE_ALIGN (type) > 128));
4279 /* In case we are passing structure, verify that it is consecutive block
4280 on the register save area. If not we need to do moves. */
4281 if (!need_temp && !REG_P (container))
4283 /* Verify that all registers are strictly consecutive */
4284 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4288 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4290 rtx slot = XVECEXP (container, 0, i);
4291 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4292 || INTVAL (XEXP (slot, 1)) != i * 16)
4300 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4302 rtx slot = XVECEXP (container, 0, i);
4303 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4304 || INTVAL (XEXP (slot, 1)) != i * 8)
4316 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4317 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4318 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4319 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4322 /* First ensure that we fit completely in registers. */
4325 t = build_int_cst (TREE_TYPE (gpr),
4326 (REGPARM_MAX - needed_intregs + 1) * 8);
4327 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4328 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4329 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4330 gimplify_and_add (t, pre_p);
4334 t = build_int_cst (TREE_TYPE (fpr),
4335 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4337 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4338 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4339 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4340 gimplify_and_add (t, pre_p);
4343 /* Compute index to start of area used for integer regs. */
4346 /* int_addr = gpr + sav; */
4347 t = fold_convert (ptr_type_node, gpr);
4348 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4349 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4350 gimplify_and_add (t, pre_p);
4354 /* sse_addr = fpr + sav; */
4355 t = fold_convert (ptr_type_node, fpr);
4356 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4357 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4358 gimplify_and_add (t, pre_p);
4363 tree temp = create_tmp_var (type, "va_arg_tmp");
4366 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4367 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4368 gimplify_and_add (t, pre_p);
4370 for (i = 0; i < XVECLEN (container, 0); i++)
4372 rtx slot = XVECEXP (container, 0, i);
4373 rtx reg = XEXP (slot, 0);
4374 enum machine_mode mode = GET_MODE (reg);
4375 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4376 tree addr_type = build_pointer_type (piece_type);
4379 tree dest_addr, dest;
4381 if (SSE_REGNO_P (REGNO (reg)))
4383 src_addr = sse_addr;
4384 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4388 src_addr = int_addr;
4389 src_offset = REGNO (reg) * 8;
4391 src_addr = fold_convert (addr_type, src_addr);
4392 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4393 size_int (src_offset)));
4394 src = build_va_arg_indirect_ref (src_addr);
4396 dest_addr = fold_convert (addr_type, addr);
4397 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4398 size_int (INTVAL (XEXP (slot, 1)))));
4399 dest = build_va_arg_indirect_ref (dest_addr);
4401 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4402 gimplify_and_add (t, pre_p);
4408 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4409 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4410 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4411 gimplify_and_add (t, pre_p);
4415 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4416 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4417 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4418 gimplify_and_add (t, pre_p);
4421 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4422 gimplify_and_add (t, pre_p);
4424 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4425 append_to_statement_list (t, pre_p);
4428 /* ... otherwise out of the overflow area. */
4430 /* Care for on-stack alignment if needed. */
4431 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4432 || integer_zerop (TYPE_SIZE (type)))
4436 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4437 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4438 build_int_cst (TREE_TYPE (ovf), align - 1));
4439 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4440 build_int_cst (TREE_TYPE (t), -align));
4442 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4444 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4445 gimplify_and_add (t2, pre_p);
4447 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4448 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4449 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4450 gimplify_and_add (t, pre_p);
4454 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4455 append_to_statement_list (t, pre_p);
4458 ptrtype = build_pointer_type (type);
4459 addr = fold_convert (ptrtype, addr);
4462 addr = build_va_arg_indirect_ref (addr);
4463 return build_va_arg_indirect_ref (addr);
4466 /* Return nonzero if OPNUM's MEM should be matched
4467 in movabs* patterns. */
4470 ix86_check_movabs (rtx insn, int opnum)
4474 set = PATTERN (insn);
4475 if (GET_CODE (set) == PARALLEL)
4476 set = XVECEXP (set, 0, 0);
4477 gcc_assert (GET_CODE (set) == SET);
4478 mem = XEXP (set, opnum);
4479 while (GET_CODE (mem) == SUBREG)
4480 mem = SUBREG_REG (mem);
4481 gcc_assert (GET_CODE (mem) == MEM);
4482 return (volatile_ok || !MEM_VOLATILE_P (mem));
4485 /* Initialize the table of extra 80387 mathematical constants. */
4488 init_ext_80387_constants (void)
4490 static const char * cst[5] =
4492 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4493 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4494 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4495 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4496 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4500 for (i = 0; i < 5; i++)
4502 real_from_string (&ext_80387_constants_table[i], cst[i]);
4503 /* Ensure each constant is rounded to XFmode precision. */
4504 real_convert (&ext_80387_constants_table[i],
4505 XFmode, &ext_80387_constants_table[i]);
4508 ext_80387_constants_init = 1;
4511 /* Return true if the constant is something that can be loaded with
4512 a special instruction. */
4515 standard_80387_constant_p (rtx x)
4517 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4520 if (x == CONST0_RTX (GET_MODE (x)))
4522 if (x == CONST1_RTX (GET_MODE (x)))
4525 /* For XFmode constants, try to find a special 80387 instruction when
4526 optimizing for size or on those CPUs that benefit from them. */
4527 if (GET_MODE (x) == XFmode
4528 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4533 if (! ext_80387_constants_init)
4534 init_ext_80387_constants ();
4536 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4537 for (i = 0; i < 5; i++)
4538 if (real_identical (&r, &ext_80387_constants_table[i]))
4545 /* Return the opcode of the special instruction to be used to load
4549 standard_80387_constant_opcode (rtx x)
4551 switch (standard_80387_constant_p (x))
4572 /* Return the CONST_DOUBLE representing the 80387 constant that is
4573 loaded by the specified special instruction. The argument IDX
4574 matches the return value from standard_80387_constant_p. */
4577 standard_80387_constant_rtx (int idx)
4581 if (! ext_80387_constants_init)
4582 init_ext_80387_constants ();
4598 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4602 /* Return 1 if mode is a valid mode for sse. */
4604 standard_sse_mode_p (enum machine_mode mode)
4621 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4624 standard_sse_constant_p (rtx x)
4626 enum machine_mode mode = GET_MODE (x);
4628 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4630 if (vector_all_ones_operand (x, mode)
4631 && standard_sse_mode_p (mode))
4632 return TARGET_SSE2 ? 2 : -1;
4637 /* Return the opcode of the special instruction to be used to load
4641 standard_sse_constant_opcode (rtx insn, rtx x)
4643 switch (standard_sse_constant_p (x))
4646 if (get_attr_mode (insn) == MODE_V4SF)
4647 return "xorps\t%0, %0";
4648 else if (get_attr_mode (insn) == MODE_V2DF)
4649 return "xorpd\t%0, %0";
4651 return "pxor\t%0, %0";
4653 return "pcmpeqd\t%0, %0";
4658 /* Returns 1 if OP contains a symbol reference */
4661 symbolic_reference_mentioned_p (rtx op)
4666 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4669 fmt = GET_RTX_FORMAT (GET_CODE (op));
4670 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4676 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4677 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4681 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4688 /* Return 1 if it is appropriate to emit `ret' instructions in the
4689 body of a function. Do this only if the epilogue is simple, needing a
4690 couple of insns. Prior to reloading, we can't tell how many registers
4691 must be saved, so return 0 then. Return 0 if there is no frame
4692 marker to de-allocate. */
4695 ix86_can_use_return_insn_p (void)
4697 struct ix86_frame frame;
4699 if (! reload_completed || frame_pointer_needed)
4702 /* Don't allow more than 32 pop, since that's all we can do
4703 with one instruction. */
4704 if (current_function_pops_args
4705 && current_function_args_size >= 32768)
4708 ix86_compute_frame_layout (&frame);
4709 return frame.to_allocate == 0 && frame.nregs == 0;
4712 /* Value should be nonzero if functions must have frame pointers.
4713 Zero means the frame pointer need not be set up (and parms may
4714 be accessed via the stack pointer) in functions that seem suitable. */
4717 ix86_frame_pointer_required (void)
4719 /* If we accessed previous frames, then the generated code expects
4720 to be able to access the saved ebp value in our frame. */
4721 if (cfun->machine->accesses_prev_frame)
4724 /* Several x86 os'es need a frame pointer for other reasons,
4725 usually pertaining to setjmp. */
4726 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4729 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4730 the frame pointer by default. Turn it back on now if we've not
4731 got a leaf function. */
4732 if (TARGET_OMIT_LEAF_FRAME_POINTER
4733 && (!current_function_is_leaf
4734 || ix86_current_function_calls_tls_descriptor))
4737 if (current_function_profile)
4743 /* Record that the current function accesses previous call frames. */
4746 ix86_setup_frame_addresses (void)
4748 cfun->machine->accesses_prev_frame = 1;
4751 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4752 # define USE_HIDDEN_LINKONCE 1
4754 # define USE_HIDDEN_LINKONCE 0
4757 static int pic_labels_used;
4759 /* Fills in the label name that should be used for a pc thunk for
4760 the given register. */
4763 get_pc_thunk_name (char name[32], unsigned int regno)
4765 gcc_assert (!TARGET_64BIT);
4767 if (USE_HIDDEN_LINKONCE)
4768 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4770 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4774 /* This function generates code for -fpic that loads %ebx with
4775 the return address of the caller and then returns. */
4778 ix86_file_end (void)
4783 for (regno = 0; regno < 8; ++regno)
4787 if (! ((pic_labels_used >> regno) & 1))
4790 get_pc_thunk_name (name, regno);
4795 switch_to_section (darwin_sections[text_coal_section]);
4796 fputs ("\t.weak_definition\t", asm_out_file);
4797 assemble_name (asm_out_file, name);
4798 fputs ("\n\t.private_extern\t", asm_out_file);
4799 assemble_name (asm_out_file, name);
4800 fputs ("\n", asm_out_file);
4801 ASM_OUTPUT_LABEL (asm_out_file, name);
4805 if (USE_HIDDEN_LINKONCE)
4809 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4811 TREE_PUBLIC (decl) = 1;
4812 TREE_STATIC (decl) = 1;
4813 DECL_ONE_ONLY (decl) = 1;
4815 (*targetm.asm_out.unique_section) (decl, 0);
4816 switch_to_section (get_named_section (decl, NULL, 0));
4818 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4819 fputs ("\t.hidden\t", asm_out_file);
4820 assemble_name (asm_out_file, name);
4821 fputc ('\n', asm_out_file);
4822 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4826 switch_to_section (text_section);
4827 ASM_OUTPUT_LABEL (asm_out_file, name);
4830 xops[0] = gen_rtx_REG (SImode, regno);
4831 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4832 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4833 output_asm_insn ("ret", xops);
4836 if (NEED_INDICATE_EXEC_STACK)
4837 file_end_indicate_exec_stack ();
4840 /* Emit code for the SET_GOT patterns. */
4843 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4848 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4850 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4852 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4855 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4857 output_asm_insn ("call\t%a2", xops);
4860 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4861 is what will be referenced by the Mach-O PIC subsystem. */
4863 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4866 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4867 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4870 output_asm_insn ("pop{l}\t%0", xops);
4875 get_pc_thunk_name (name, REGNO (dest));
4876 pic_labels_used |= 1 << REGNO (dest);
4878 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4879 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4880 output_asm_insn ("call\t%X2", xops);
4881 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4882 is what will be referenced by the Mach-O PIC subsystem. */
4885 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4887 targetm.asm_out.internal_label (asm_out_file, "L",
4888 CODE_LABEL_NUMBER (label));
4895 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4896 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4898 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4903 /* Generate an "push" pattern for input ARG. */
4908 return gen_rtx_SET (VOIDmode,
4910 gen_rtx_PRE_DEC (Pmode,
4911 stack_pointer_rtx)),
4915 /* Return >= 0 if there is an unused call-clobbered register available
4916 for the entire function. */
4919 ix86_select_alt_pic_regnum (void)
4921 if (current_function_is_leaf && !current_function_profile
4922 && !ix86_current_function_calls_tls_descriptor)
4925 for (i = 2; i >= 0; --i)
4926 if (!regs_ever_live[i])
4930 return INVALID_REGNUM;
4933 /* Return 1 if we need to save REGNO. */
4935 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4937 if (pic_offset_table_rtx
4938 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4939 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4940 || current_function_profile
4941 || current_function_calls_eh_return
4942 || current_function_uses_const_pool))
4944 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4949 if (current_function_calls_eh_return && maybe_eh_return)
4954 unsigned test = EH_RETURN_DATA_REGNO (i);
4955 if (test == INVALID_REGNUM)
4962 if (cfun->machine->force_align_arg_pointer
4963 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4966 return (regs_ever_live[regno]
4967 && !call_used_regs[regno]
4968 && !fixed_regs[regno]
4969 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4972 /* Return number of registers to be saved on the stack. */
4975 ix86_nsaved_regs (void)
4980 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4981 if (ix86_save_reg (regno, true))
4986 /* Return the offset between two registers, one to be eliminated, and the other
4987 its replacement, at the start of a routine. */
4990 ix86_initial_elimination_offset (int from, int to)
4992 struct ix86_frame frame;
4993 ix86_compute_frame_layout (&frame);
4995 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4996 return frame.hard_frame_pointer_offset;
4997 else if (from == FRAME_POINTER_REGNUM
4998 && to == HARD_FRAME_POINTER_REGNUM)
4999 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5002 gcc_assert (to == STACK_POINTER_REGNUM);
5004 if (from == ARG_POINTER_REGNUM)
5005 return frame.stack_pointer_offset;
5007 gcc_assert (from == FRAME_POINTER_REGNUM);
5008 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5012 /* Fill structure ix86_frame about frame of currently computed function. */
5015 ix86_compute_frame_layout (struct ix86_frame *frame)
5017 HOST_WIDE_INT total_size;
5018 unsigned int stack_alignment_needed;
5019 HOST_WIDE_INT offset;
5020 unsigned int preferred_alignment;
5021 HOST_WIDE_INT size = get_frame_size ();
5023 frame->nregs = ix86_nsaved_regs ();
5026 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5027 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5029 /* During reload iteration the amount of registers saved can change.
5030 Recompute the value as needed. Do not recompute when amount of registers
5031 didn't change as reload does multiple calls to the function and does not
5032 expect the decision to change within single iteration. */
5034 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5036 int count = frame->nregs;
5038 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5039 /* The fast prologue uses move instead of push to save registers. This
5040 is significantly longer, but also executes faster as modern hardware
5041 can execute the moves in parallel, but can't do that for push/pop.
5043 Be careful about choosing what prologue to emit: When function takes
5044 many instructions to execute we may use slow version as well as in
5045 case function is known to be outside hot spot (this is known with
5046 feedback only). Weight the size of function by number of registers
5047 to save as it is cheap to use one or two push instructions but very
5048 slow to use many of them. */
5050 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5051 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5052 || (flag_branch_probabilities
5053 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5054 cfun->machine->use_fast_prologue_epilogue = false;
5056 cfun->machine->use_fast_prologue_epilogue
5057 = !expensive_function_p (count);
5059 if (TARGET_PROLOGUE_USING_MOVE
5060 && cfun->machine->use_fast_prologue_epilogue)
5061 frame->save_regs_using_mov = true;
5063 frame->save_regs_using_mov = false;
5066 /* Skip return address and saved base pointer. */
5067 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5069 frame->hard_frame_pointer_offset = offset;
5071 /* Do some sanity checking of stack_alignment_needed and
5072 preferred_alignment, since i386 port is the only using those features
5073 that may break easily. */
5075 gcc_assert (!size || stack_alignment_needed);
5076 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5077 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5078 gcc_assert (stack_alignment_needed
5079 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5081 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5082 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5084 /* Register save area */
5085 offset += frame->nregs * UNITS_PER_WORD;
5088 if (ix86_save_varrargs_registers)
5090 offset += X86_64_VARARGS_SIZE;
5091 frame->va_arg_size = X86_64_VARARGS_SIZE;
5094 frame->va_arg_size = 0;
5096 /* Align start of frame for local function. */
5097 frame->padding1 = ((offset + stack_alignment_needed - 1)
5098 & -stack_alignment_needed) - offset;
5100 offset += frame->padding1;
5102 /* Frame pointer points here. */
5103 frame->frame_pointer_offset = offset;
5107 /* Add outgoing arguments area. Can be skipped if we eliminated
5108 all the function calls as dead code.
5109 Skipping is however impossible when function calls alloca. Alloca
5110 expander assumes that last current_function_outgoing_args_size
5111 of stack frame are unused. */
5112 if (ACCUMULATE_OUTGOING_ARGS
5113 && (!current_function_is_leaf || current_function_calls_alloca
5114 || ix86_current_function_calls_tls_descriptor))
5116 offset += current_function_outgoing_args_size;
5117 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5120 frame->outgoing_arguments_size = 0;
5122 /* Align stack boundary. Only needed if we're calling another function
5124 if (!current_function_is_leaf || current_function_calls_alloca
5125 || ix86_current_function_calls_tls_descriptor)
5126 frame->padding2 = ((offset + preferred_alignment - 1)
5127 & -preferred_alignment) - offset;
5129 frame->padding2 = 0;
5131 offset += frame->padding2;
5133 /* We've reached end of stack frame. */
5134 frame->stack_pointer_offset = offset;
5136 /* Size prologue needs to allocate. */
5137 frame->to_allocate =
5138 (size + frame->padding1 + frame->padding2
5139 + frame->outgoing_arguments_size + frame->va_arg_size);
5141 if ((!frame->to_allocate && frame->nregs <= 1)
5142 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5143 frame->save_regs_using_mov = false;
5145 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5146 && current_function_is_leaf
5147 && !ix86_current_function_calls_tls_descriptor)
5149 frame->red_zone_size = frame->to_allocate;
5150 if (frame->save_regs_using_mov)
5151 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5152 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5153 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5156 frame->red_zone_size = 0;
5157 frame->to_allocate -= frame->red_zone_size;
5158 frame->stack_pointer_offset -= frame->red_zone_size;
5160 fprintf (stderr, "nregs: %i\n", frame->nregs);
5161 fprintf (stderr, "size: %i\n", size);
5162 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5163 fprintf (stderr, "padding1: %i\n", frame->padding1);
5164 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5165 fprintf (stderr, "padding2: %i\n", frame->padding2);
5166 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5167 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5168 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5169 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5170 frame->hard_frame_pointer_offset);
5171 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5175 /* Emit code to save registers in the prologue. */
5178 ix86_emit_save_regs (void)
5183 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5184 if (ix86_save_reg (regno, true))
5186 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5187 RTX_FRAME_RELATED_P (insn) = 1;
5191 /* Emit code to save registers using MOV insns. First register
5192 is restored from POINTER + OFFSET. */
5194 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5199 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5200 if (ix86_save_reg (regno, true))
5202 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5204 gen_rtx_REG (Pmode, regno));
5205 RTX_FRAME_RELATED_P (insn) = 1;
5206 offset += UNITS_PER_WORD;
5210 /* Expand prologue or epilogue stack adjustment.
5211 The pattern exist to put a dependency on all ebp-based memory accesses.
5212 STYLE should be negative if instructions should be marked as frame related,
5213 zero if %r11 register is live and cannot be freely used and positive
5217 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5222 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5223 else if (x86_64_immediate_operand (offset, DImode))
5224 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5228 /* r11 is used by indirect sibcall return as well, set before the
5229 epilogue and used after the epilogue. ATM indirect sibcall
5230 shouldn't be used together with huge frame sizes in one
5231 function because of the frame_size check in sibcall.c. */
5233 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5234 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5236 RTX_FRAME_RELATED_P (insn) = 1;
5237 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5241 RTX_FRAME_RELATED_P (insn) = 1;
5244 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5247 ix86_internal_arg_pointer (void)
5249 bool has_force_align_arg_pointer =
5250 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5251 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5252 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5253 && DECL_NAME (current_function_decl)
5254 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5255 && DECL_FILE_SCOPE_P (current_function_decl))
5256 || ix86_force_align_arg_pointer
5257 || has_force_align_arg_pointer)
5259 /* Nested functions can't realign the stack due to a register
5261 if (DECL_CONTEXT (current_function_decl)
5262 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5264 if (ix86_force_align_arg_pointer)
5265 warning (0, "-mstackrealign ignored for nested functions");
5266 if (has_force_align_arg_pointer)
5267 error ("%s not supported for nested functions",
5268 ix86_force_align_arg_pointer_string);
5269 return virtual_incoming_args_rtx;
5271 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5272 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5275 return virtual_incoming_args_rtx;
5278 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5279 This is called from dwarf2out.c to emit call frame instructions
5280 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5282 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5284 rtx unspec = SET_SRC (pattern);
5285 gcc_assert (GET_CODE (unspec) == UNSPEC);
5289 case UNSPEC_REG_SAVE:
5290 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5291 SET_DEST (pattern));
5293 case UNSPEC_DEF_CFA:
5294 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5295 INTVAL (XVECEXP (unspec, 0, 0)));
5302 /* Expand the prologue into a bunch of separate insns. */
5305 ix86_expand_prologue (void)
5309 struct ix86_frame frame;
5310 HOST_WIDE_INT allocate;
5312 ix86_compute_frame_layout (&frame);
5314 if (cfun->machine->force_align_arg_pointer)
5318 /* Grab the argument pointer. */
5319 x = plus_constant (stack_pointer_rtx, 4);
5320 y = cfun->machine->force_align_arg_pointer;
5321 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5322 RTX_FRAME_RELATED_P (insn) = 1;
5324 /* The unwind info consists of two parts: install the fafp as the cfa,
5325 and record the fafp as the "save register" of the stack pointer.
5326 The later is there in order that the unwinder can see where it
5327 should restore the stack pointer across the and insn. */
5328 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5329 x = gen_rtx_SET (VOIDmode, y, x);
5330 RTX_FRAME_RELATED_P (x) = 1;
5331 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5333 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5334 RTX_FRAME_RELATED_P (y) = 1;
5335 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5336 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5337 REG_NOTES (insn) = x;
5339 /* Align the stack. */
5340 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5343 /* And here we cheat like madmen with the unwind info. We force the
5344 cfa register back to sp+4, which is exactly what it was at the
5345 start of the function. Re-pushing the return address results in
5346 the return at the same spot relative to the cfa, and thus is
5347 correct wrt the unwind info. */
5348 x = cfun->machine->force_align_arg_pointer;
5349 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5350 insn = emit_insn (gen_push (x));
5351 RTX_FRAME_RELATED_P (insn) = 1;
5354 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5355 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5356 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5357 REG_NOTES (insn) = x;
5360 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5361 slower on all targets. Also sdb doesn't like it. */
5363 if (frame_pointer_needed)
5365 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5366 RTX_FRAME_RELATED_P (insn) = 1;
5368 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5369 RTX_FRAME_RELATED_P (insn) = 1;
5372 allocate = frame.to_allocate;
5374 if (!frame.save_regs_using_mov)
5375 ix86_emit_save_regs ();
5377 allocate += frame.nregs * UNITS_PER_WORD;
5379 /* When using red zone we may start register saving before allocating
5380 the stack frame saving one cycle of the prologue. */
5381 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5382 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5383 : stack_pointer_rtx,
5384 -frame.nregs * UNITS_PER_WORD);
5388 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5389 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5390 GEN_INT (-allocate), -1);
5393 /* Only valid for Win32. */
5394 rtx eax = gen_rtx_REG (SImode, 0);
5395 bool eax_live = ix86_eax_live_at_start_p ();
5398 gcc_assert (!TARGET_64BIT);
5402 emit_insn (gen_push (eax));
5406 emit_move_insn (eax, GEN_INT (allocate));
5408 insn = emit_insn (gen_allocate_stack_worker (eax));
5409 RTX_FRAME_RELATED_P (insn) = 1;
5410 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5411 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5412 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5413 t, REG_NOTES (insn));
5417 if (frame_pointer_needed)
5418 t = plus_constant (hard_frame_pointer_rtx,
5421 - frame.nregs * UNITS_PER_WORD);
5423 t = plus_constant (stack_pointer_rtx, allocate);
5424 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5428 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5430 if (!frame_pointer_needed || !frame.to_allocate)
5431 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5433 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5434 -frame.nregs * UNITS_PER_WORD);
5437 pic_reg_used = false;
5438 if (pic_offset_table_rtx
5439 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5440 || current_function_profile))
5442 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5444 if (alt_pic_reg_used != INVALID_REGNUM)
5445 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5447 pic_reg_used = true;
5453 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5455 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5457 /* Even with accurate pre-reload life analysis, we can wind up
5458 deleting all references to the pic register after reload.
5459 Consider if cross-jumping unifies two sides of a branch
5460 controlled by a comparison vs the only read from a global.
5461 In which case, allow the set_got to be deleted, though we're
5462 too late to do anything about the ebx save in the prologue. */
5463 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5466 /* Prevent function calls from be scheduled before the call to mcount.
5467 In the pic_reg_used case, make sure that the got load isn't deleted. */
5468 if (current_function_profile)
5469 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5472 /* Emit code to restore saved registers using MOV insns. First register
5473 is restored from POINTER + OFFSET. */
5475 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5476 int maybe_eh_return)
5479 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5481 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5482 if (ix86_save_reg (regno, maybe_eh_return))
5484 /* Ensure that adjust_address won't be forced to produce pointer
5485 out of range allowed by x86-64 instruction set. */
5486 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5490 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5491 emit_move_insn (r11, GEN_INT (offset));
5492 emit_insn (gen_adddi3 (r11, r11, pointer));
5493 base_address = gen_rtx_MEM (Pmode, r11);
5496 emit_move_insn (gen_rtx_REG (Pmode, regno),
5497 adjust_address (base_address, Pmode, offset));
5498 offset += UNITS_PER_WORD;
5502 /* Restore function stack, frame, and registers. */
5505 ix86_expand_epilogue (int style)
5508 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5509 struct ix86_frame frame;
5510 HOST_WIDE_INT offset;
5512 ix86_compute_frame_layout (&frame);
5514 /* Calculate start of saved registers relative to ebp. Special care
5515 must be taken for the normal return case of a function using
5516 eh_return: the eax and edx registers are marked as saved, but not
5517 restored along this path. */
5518 offset = frame.nregs;
5519 if (current_function_calls_eh_return && style != 2)
5521 offset *= -UNITS_PER_WORD;
5523 /* If we're only restoring one register and sp is not valid then
5524 using a move instruction to restore the register since it's
5525 less work than reloading sp and popping the register.
5527 The default code result in stack adjustment using add/lea instruction,
5528 while this code results in LEAVE instruction (or discrete equivalent),
5529 so it is profitable in some other cases as well. Especially when there
5530 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5531 and there is exactly one register to pop. This heuristic may need some
5532 tuning in future. */
5533 if ((!sp_valid && frame.nregs <= 1)
5534 || (TARGET_EPILOGUE_USING_MOVE
5535 && cfun->machine->use_fast_prologue_epilogue
5536 && (frame.nregs > 1 || frame.to_allocate))
5537 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5538 || (frame_pointer_needed && TARGET_USE_LEAVE
5539 && cfun->machine->use_fast_prologue_epilogue
5540 && frame.nregs == 1)
5541 || current_function_calls_eh_return)
5543 /* Restore registers. We can use ebp or esp to address the memory
5544 locations. If both are available, default to ebp, since offsets
5545 are known to be small. Only exception is esp pointing directly to the
5546 end of block of saved registers, where we may simplify addressing
5549 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5550 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5551 frame.to_allocate, style == 2);
5553 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5554 offset, style == 2);
5556 /* eh_return epilogues need %ecx added to the stack pointer. */
5559 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5561 if (frame_pointer_needed)
5563 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5564 tmp = plus_constant (tmp, UNITS_PER_WORD);
5565 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5567 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5568 emit_move_insn (hard_frame_pointer_rtx, tmp);
5570 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5575 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5576 tmp = plus_constant (tmp, (frame.to_allocate
5577 + frame.nregs * UNITS_PER_WORD));
5578 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5581 else if (!frame_pointer_needed)
5582 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5583 GEN_INT (frame.to_allocate
5584 + frame.nregs * UNITS_PER_WORD),
5586 /* If not an i386, mov & pop is faster than "leave". */
5587 else if (TARGET_USE_LEAVE || optimize_size
5588 || !cfun->machine->use_fast_prologue_epilogue)
5589 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5592 pro_epilogue_adjust_stack (stack_pointer_rtx,
5593 hard_frame_pointer_rtx,
5596 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5598 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5603 /* First step is to deallocate the stack frame so that we can
5604 pop the registers. */
5607 gcc_assert (frame_pointer_needed);
5608 pro_epilogue_adjust_stack (stack_pointer_rtx,
5609 hard_frame_pointer_rtx,
5610 GEN_INT (offset), style);
5612 else if (frame.to_allocate)
5613 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5614 GEN_INT (frame.to_allocate), style);
5616 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5617 if (ix86_save_reg (regno, false))
5620 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5622 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5624 if (frame_pointer_needed)
5626 /* Leave results in shorter dependency chains on CPUs that are
5627 able to grok it fast. */
5628 if (TARGET_USE_LEAVE)
5629 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5630 else if (TARGET_64BIT)
5631 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5633 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5637 if (cfun->machine->force_align_arg_pointer)
5639 emit_insn (gen_addsi3 (stack_pointer_rtx,
5640 cfun->machine->force_align_arg_pointer,
5644 /* Sibcall epilogues don't want a return instruction. */
5648 if (current_function_pops_args && current_function_args_size)
5650 rtx popc = GEN_INT (current_function_pops_args);
5652 /* i386 can only pop 64K bytes. If asked to pop more, pop
5653 return address, do explicit add, and jump indirectly to the
5656 if (current_function_pops_args >= 65536)
5658 rtx ecx = gen_rtx_REG (SImode, 2);
5660 /* There is no "pascal" calling convention in 64bit ABI. */
5661 gcc_assert (!TARGET_64BIT);
5663 emit_insn (gen_popsi1 (ecx));
5664 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5665 emit_jump_insn (gen_return_indirect_internal (ecx));
5668 emit_jump_insn (gen_return_pop_internal (popc));
5671 emit_jump_insn (gen_return_internal ());
5674 /* Reset from the function's potential modifications. */
5677 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5678 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5680 if (pic_offset_table_rtx)
5681 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5683 /* Mach-O doesn't support labels at the end of objects, so if
5684 it looks like we might want one, insert a NOP. */
5686 rtx insn = get_last_insn ();
5689 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5690 insn = PREV_INSN (insn);
5694 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5695 fputs ("\tnop\n", file);
5701 /* Extract the parts of an RTL expression that is a valid memory address
5702 for an instruction. Return 0 if the structure of the address is
5703 grossly off. Return -1 if the address contains ASHIFT, so it is not
5704 strictly valid, but still used for computing length of lea instruction. */
5707 ix86_decompose_address (rtx addr, struct ix86_address *out)
5709 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5710 rtx base_reg, index_reg;
5711 HOST_WIDE_INT scale = 1;
5712 rtx scale_rtx = NULL_RTX;
5714 enum ix86_address_seg seg = SEG_DEFAULT;
5716 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5718 else if (GET_CODE (addr) == PLUS)
5728 addends[n++] = XEXP (op, 1);
5731 while (GET_CODE (op) == PLUS);
5736 for (i = n; i >= 0; --i)
5739 switch (GET_CODE (op))
5744 index = XEXP (op, 0);
5745 scale_rtx = XEXP (op, 1);
5749 if (XINT (op, 1) == UNSPEC_TP
5750 && TARGET_TLS_DIRECT_SEG_REFS
5751 && seg == SEG_DEFAULT)
5752 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5781 else if (GET_CODE (addr) == MULT)
5783 index = XEXP (addr, 0); /* index*scale */
5784 scale_rtx = XEXP (addr, 1);
5786 else if (GET_CODE (addr) == ASHIFT)
5790 /* We're called for lea too, which implements ashift on occasion. */
5791 index = XEXP (addr, 0);
5792 tmp = XEXP (addr, 1);
5793 if (GET_CODE (tmp) != CONST_INT)
5795 scale = INTVAL (tmp);
5796 if ((unsigned HOST_WIDE_INT) scale > 3)
5802 disp = addr; /* displacement */
5804 /* Extract the integral value of scale. */
5807 if (GET_CODE (scale_rtx) != CONST_INT)
5809 scale = INTVAL (scale_rtx);
5812 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5813 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5815 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5816 if (base_reg && index_reg && scale == 1
5817 && (index_reg == arg_pointer_rtx
5818 || index_reg == frame_pointer_rtx
5819 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5822 tmp = base, base = index, index = tmp;
5823 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5826 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5827 if ((base_reg == hard_frame_pointer_rtx
5828 || base_reg == frame_pointer_rtx
5829 || base_reg == arg_pointer_rtx) && !disp)
5832 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5833 Avoid this by transforming to [%esi+0]. */
5834 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5835 && base_reg && !index_reg && !disp
5837 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5840 /* Special case: encode reg+reg instead of reg*2. */
5841 if (!base && index && scale && scale == 2)
5842 base = index, base_reg = index_reg, scale = 1;
5844 /* Special case: scaling cannot be encoded without base or displacement. */
5845 if (!base && !disp && index && scale != 1)
5857 /* Return cost of the memory address x.
5858 For i386, it is better to use a complex address than let gcc copy
5859 the address into a reg and make a new pseudo. But not if the address
5860 requires to two regs - that would mean more pseudos with longer
5863 ix86_address_cost (rtx x)
5865 struct ix86_address parts;
5867 int ok = ix86_decompose_address (x, &parts);
5871 if (parts.base && GET_CODE (parts.base) == SUBREG)
5872 parts.base = SUBREG_REG (parts.base);
5873 if (parts.index && GET_CODE (parts.index) == SUBREG)
5874 parts.index = SUBREG_REG (parts.index);
5876 /* More complex memory references are better. */
5877 if (parts.disp && parts.disp != const0_rtx)
5879 if (parts.seg != SEG_DEFAULT)
5882 /* Attempt to minimize number of registers in the address. */
5884 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5886 && (!REG_P (parts.index)
5887 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5891 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5893 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5894 && parts.base != parts.index)
5897 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5898 since it's predecode logic can't detect the length of instructions
5899 and it degenerates to vector decoded. Increase cost of such
5900 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5901 to split such addresses or even refuse such addresses at all.
5903 Following addressing modes are affected:
5908 The first and last case may be avoidable by explicitly coding the zero in
5909 memory address, but I don't have AMD-K6 machine handy to check this
5913 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5914 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5915 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5921 /* If X is a machine specific address (i.e. a symbol or label being
5922 referenced as a displacement from the GOT implemented using an
5923 UNSPEC), then return the base term. Otherwise return X. */
5926 ix86_find_base_term (rtx x)
5932 if (GET_CODE (x) != CONST)
5935 if (GET_CODE (term) == PLUS
5936 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5937 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5938 term = XEXP (term, 0);
5939 if (GET_CODE (term) != UNSPEC
5940 || XINT (term, 1) != UNSPEC_GOTPCREL)
5943 term = XVECEXP (term, 0, 0);
5945 if (GET_CODE (term) != SYMBOL_REF
5946 && GET_CODE (term) != LABEL_REF)
5952 term = ix86_delegitimize_address (x);
5954 if (GET_CODE (term) != SYMBOL_REF
5955 && GET_CODE (term) != LABEL_REF)
5961 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5962 this is used for to form addresses to local data when -fPIC is in
5966 darwin_local_data_pic (rtx disp)
5968 if (GET_CODE (disp) == MINUS)
5970 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5971 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5972 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5974 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5975 if (! strcmp (sym_name, "<pic base>"))
5983 /* Determine if a given RTX is a valid constant. We already know this
5984 satisfies CONSTANT_P. */
5987 legitimate_constant_p (rtx x)
5989 switch (GET_CODE (x))
5994 if (GET_CODE (x) == PLUS)
5996 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6001 if (TARGET_MACHO && darwin_local_data_pic (x))
6004 /* Only some unspecs are valid as "constants". */
6005 if (GET_CODE (x) == UNSPEC)
6006 switch (XINT (x, 1))
6009 return TARGET_64BIT;
6012 x = XVECEXP (x, 0, 0);
6013 return (GET_CODE (x) == SYMBOL_REF
6014 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6016 x = XVECEXP (x, 0, 0);
6017 return (GET_CODE (x) == SYMBOL_REF
6018 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6023 /* We must have drilled down to a symbol. */
6024 if (GET_CODE (x) == LABEL_REF)
6026 if (GET_CODE (x) != SYMBOL_REF)
6031 /* TLS symbols are never valid. */
6032 if (SYMBOL_REF_TLS_MODEL (x))
6037 if (GET_MODE (x) == TImode
6038 && x != CONST0_RTX (TImode)
6044 if (x == CONST0_RTX (GET_MODE (x)))
6052 /* Otherwise we handle everything else in the move patterns. */
6056 /* Determine if it's legal to put X into the constant pool. This
6057 is not possible for the address of thread-local symbols, which
6058 is checked above. */
6061 ix86_cannot_force_const_mem (rtx x)
6063 /* We can always put integral constants and vectors in memory. */
6064 switch (GET_CODE (x))
6074 return !legitimate_constant_p (x);
6077 /* Determine if a given RTX is a valid constant address. */
6080 constant_address_p (rtx x)
6082 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6085 /* Nonzero if the constant value X is a legitimate general operand
6086 when generating PIC code. It is given that flag_pic is on and
6087 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6090 legitimate_pic_operand_p (rtx x)
6094 switch (GET_CODE (x))
6097 inner = XEXP (x, 0);
6098 if (GET_CODE (inner) == PLUS
6099 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6100 inner = XEXP (inner, 0);
6102 /* Only some unspecs are valid as "constants". */
6103 if (GET_CODE (inner) == UNSPEC)
6104 switch (XINT (inner, 1))
6107 return TARGET_64BIT;
6109 x = XVECEXP (inner, 0, 0);
6110 return (GET_CODE (x) == SYMBOL_REF
6111 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6119 return legitimate_pic_address_disp_p (x);
6126 /* Determine if a given CONST RTX is a valid memory displacement
6130 legitimate_pic_address_disp_p (rtx disp)
6134 /* In 64bit mode we can allow direct addresses of symbols and labels
6135 when they are not dynamic symbols. */
6138 rtx op0 = disp, op1;
6140 switch (GET_CODE (disp))
6146 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6148 op0 = XEXP (XEXP (disp, 0), 0);
6149 op1 = XEXP (XEXP (disp, 0), 1);
6150 if (GET_CODE (op1) != CONST_INT
6151 || INTVAL (op1) >= 16*1024*1024
6152 || INTVAL (op1) < -16*1024*1024)
6154 if (GET_CODE (op0) == LABEL_REF)
6156 if (GET_CODE (op0) != SYMBOL_REF)
6161 /* TLS references should always be enclosed in UNSPEC. */
6162 if (SYMBOL_REF_TLS_MODEL (op0))
6164 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6172 if (GET_CODE (disp) != CONST)
6174 disp = XEXP (disp, 0);
6178 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6179 of GOT tables. We should not need these anyway. */
6180 if (GET_CODE (disp) != UNSPEC
6181 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6182 && XINT (disp, 1) != UNSPEC_GOTOFF))
6185 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6186 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6192 if (GET_CODE (disp) == PLUS)
6194 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6196 disp = XEXP (disp, 0);
6200 if (TARGET_MACHO && darwin_local_data_pic (disp))
6203 if (GET_CODE (disp) != UNSPEC)
6206 switch (XINT (disp, 1))
6211 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6213 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6214 While ABI specify also 32bit relocation but we don't produce it in
6215 small PIC model at all. */
6216 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6217 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6219 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6221 case UNSPEC_GOTTPOFF:
6222 case UNSPEC_GOTNTPOFF:
6223 case UNSPEC_INDNTPOFF:
6226 disp = XVECEXP (disp, 0, 0);
6227 return (GET_CODE (disp) == SYMBOL_REF
6228 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6230 disp = XVECEXP (disp, 0, 0);
6231 return (GET_CODE (disp) == SYMBOL_REF
6232 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6234 disp = XVECEXP (disp, 0, 0);
6235 return (GET_CODE (disp) == SYMBOL_REF
6236 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6242 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6243 memory address for an instruction. The MODE argument is the machine mode
6244 for the MEM expression that wants to use this address.
6246 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6247 convert common non-canonical forms to canonical form so that they will
6251 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6253 struct ix86_address parts;
6254 rtx base, index, disp;
6255 HOST_WIDE_INT scale;
6256 const char *reason = NULL;
6257 rtx reason_rtx = NULL_RTX;
6259 if (TARGET_DEBUG_ADDR)
6262 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6263 GET_MODE_NAME (mode), strict);
6267 if (ix86_decompose_address (addr, &parts) <= 0)
6269 reason = "decomposition failed";
6274 index = parts.index;
6276 scale = parts.scale;
6278 /* Validate base register.
6280 Don't allow SUBREG's that span more than a word here. It can lead to spill
6281 failures when the base is one word out of a two word structure, which is
6282 represented internally as a DImode int. */
6291 else if (GET_CODE (base) == SUBREG
6292 && REG_P (SUBREG_REG (base))
6293 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6295 reg = SUBREG_REG (base);
6298 reason = "base is not a register";
6302 if (GET_MODE (base) != Pmode)
6304 reason = "base is not in Pmode";
6308 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6309 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6311 reason = "base is not valid";
6316 /* Validate index register.
6318 Don't allow SUBREG's that span more than a word here -- same as above. */
6327 else if (GET_CODE (index) == SUBREG
6328 && REG_P (SUBREG_REG (index))
6329 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6331 reg = SUBREG_REG (index);
6334 reason = "index is not a register";
6338 if (GET_MODE (index) != Pmode)
6340 reason = "index is not in Pmode";
6344 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6345 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6347 reason = "index is not valid";
6352 /* Validate scale factor. */
6355 reason_rtx = GEN_INT (scale);
6358 reason = "scale without index";
6362 if (scale != 2 && scale != 4 && scale != 8)
6364 reason = "scale is not a valid multiplier";
6369 /* Validate displacement. */
6374 if (GET_CODE (disp) == CONST
6375 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6376 switch (XINT (XEXP (disp, 0), 1))
6378 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6379 used. While ABI specify also 32bit relocations, we don't produce
6380 them at all and use IP relative instead. */
6383 gcc_assert (flag_pic);
6385 goto is_legitimate_pic;
6386 reason = "64bit address unspec";
6389 case UNSPEC_GOTPCREL:
6390 gcc_assert (flag_pic);
6391 goto is_legitimate_pic;
6393 case UNSPEC_GOTTPOFF:
6394 case UNSPEC_GOTNTPOFF:
6395 case UNSPEC_INDNTPOFF:
6401 reason = "invalid address unspec";
6405 else if (SYMBOLIC_CONST (disp)
6409 && MACHOPIC_INDIRECT
6410 && !machopic_operand_p (disp)
6416 if (TARGET_64BIT && (index || base))
6418 /* foo@dtpoff(%rX) is ok. */
6419 if (GET_CODE (disp) != CONST
6420 || GET_CODE (XEXP (disp, 0)) != PLUS
6421 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6422 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6423 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6424 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6426 reason = "non-constant pic memory reference";
6430 else if (! legitimate_pic_address_disp_p (disp))
6432 reason = "displacement is an invalid pic construct";
6436 /* This code used to verify that a symbolic pic displacement
6437 includes the pic_offset_table_rtx register.
6439 While this is good idea, unfortunately these constructs may
6440 be created by "adds using lea" optimization for incorrect
6449 This code is nonsensical, but results in addressing
6450 GOT table with pic_offset_table_rtx base. We can't
6451 just refuse it easily, since it gets matched by
6452 "addsi3" pattern, that later gets split to lea in the
6453 case output register differs from input. While this
6454 can be handled by separate addsi pattern for this case
6455 that never results in lea, this seems to be easier and
6456 correct fix for crash to disable this test. */
6458 else if (GET_CODE (disp) != LABEL_REF
6459 && GET_CODE (disp) != CONST_INT
6460 && (GET_CODE (disp) != CONST
6461 || !legitimate_constant_p (disp))
6462 && (GET_CODE (disp) != SYMBOL_REF
6463 || !legitimate_constant_p (disp)))
6465 reason = "displacement is not constant";
6468 else if (TARGET_64BIT
6469 && !x86_64_immediate_operand (disp, VOIDmode))
6471 reason = "displacement is out of range";
6476 /* Everything looks valid. */
6477 if (TARGET_DEBUG_ADDR)
6478 fprintf (stderr, "Success.\n");
6482 if (TARGET_DEBUG_ADDR)
6484 fprintf (stderr, "Error: %s\n", reason);
6485 debug_rtx (reason_rtx);
6490 /* Return a unique alias set for the GOT. */
6492 static HOST_WIDE_INT
6493 ix86_GOT_alias_set (void)
6495 static HOST_WIDE_INT set = -1;
6497 set = new_alias_set ();
6501 /* Return a legitimate reference for ORIG (an address) using the
6502 register REG. If REG is 0, a new pseudo is generated.
6504 There are two types of references that must be handled:
6506 1. Global data references must load the address from the GOT, via
6507 the PIC reg. An insn is emitted to do this load, and the reg is
6510 2. Static data references, constant pool addresses, and code labels
6511 compute the address as an offset from the GOT, whose base is in
6512 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6513 differentiate them from global data objects. The returned
6514 address is the PIC reg + an unspec constant.
6516 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6517 reg also appears in the address. */
6520 legitimize_pic_address (rtx orig, rtx reg)
6527 if (TARGET_MACHO && !TARGET_64BIT)
6530 reg = gen_reg_rtx (Pmode);
6531 /* Use the generic Mach-O PIC machinery. */
6532 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6536 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6538 else if (TARGET_64BIT
6539 && ix86_cmodel != CM_SMALL_PIC
6540 && local_symbolic_operand (addr, Pmode))
6543 /* This symbol may be referenced via a displacement from the PIC
6544 base address (@GOTOFF). */
6546 if (reload_in_progress)
6547 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6548 if (GET_CODE (addr) == CONST)
6549 addr = XEXP (addr, 0);
6550 if (GET_CODE (addr) == PLUS)
6552 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6553 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6556 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6557 new = gen_rtx_CONST (Pmode, new);
6559 tmpreg = gen_reg_rtx (Pmode);
6562 emit_move_insn (tmpreg, new);
6566 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6567 tmpreg, 1, OPTAB_DIRECT);
6570 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6572 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6574 /* This symbol may be referenced via a displacement from the PIC
6575 base address (@GOTOFF). */
6577 if (reload_in_progress)
6578 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6579 if (GET_CODE (addr) == CONST)
6580 addr = XEXP (addr, 0);
6581 if (GET_CODE (addr) == PLUS)
6583 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6584 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6587 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6588 new = gen_rtx_CONST (Pmode, new);
6589 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6593 emit_move_insn (reg, new);
6597 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6601 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6602 new = gen_rtx_CONST (Pmode, new);
6603 new = gen_const_mem (Pmode, new);
6604 set_mem_alias_set (new, ix86_GOT_alias_set ());
6607 reg = gen_reg_rtx (Pmode);
6608 /* Use directly gen_movsi, otherwise the address is loaded
6609 into register for CSE. We don't want to CSE this addresses,
6610 instead we CSE addresses from the GOT table, so skip this. */
6611 emit_insn (gen_movsi (reg, new));
6616 /* This symbol must be referenced via a load from the
6617 Global Offset Table (@GOT). */
6619 if (reload_in_progress)
6620 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6621 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6622 new = gen_rtx_CONST (Pmode, new);
6623 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6624 new = gen_const_mem (Pmode, new);
6625 set_mem_alias_set (new, ix86_GOT_alias_set ());
6628 reg = gen_reg_rtx (Pmode);
6629 emit_move_insn (reg, new);
6635 if (GET_CODE (addr) == CONST_INT
6636 && !x86_64_immediate_operand (addr, VOIDmode))
6640 emit_move_insn (reg, addr);
6644 new = force_reg (Pmode, addr);
6646 else if (GET_CODE (addr) == CONST)
6648 addr = XEXP (addr, 0);
6650 /* We must match stuff we generate before. Assume the only
6651 unspecs that can get here are ours. Not that we could do
6652 anything with them anyway.... */
6653 if (GET_CODE (addr) == UNSPEC
6654 || (GET_CODE (addr) == PLUS
6655 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6657 gcc_assert (GET_CODE (addr) == PLUS);
6659 if (GET_CODE (addr) == PLUS)
6661 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6663 /* Check first to see if this is a constant offset from a @GOTOFF
6664 symbol reference. */
6665 if (local_symbolic_operand (op0, Pmode)
6666 && GET_CODE (op1) == CONST_INT)
6670 if (reload_in_progress)
6671 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6672 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6674 new = gen_rtx_PLUS (Pmode, new, op1);
6675 new = gen_rtx_CONST (Pmode, new);
6676 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6680 emit_move_insn (reg, new);
6686 if (INTVAL (op1) < -16*1024*1024
6687 || INTVAL (op1) >= 16*1024*1024)
6689 if (!x86_64_immediate_operand (op1, Pmode))
6690 op1 = force_reg (Pmode, op1);
6691 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6697 base = legitimize_pic_address (XEXP (addr, 0), reg);
6698 new = legitimize_pic_address (XEXP (addr, 1),
6699 base == reg ? NULL_RTX : reg);
6701 if (GET_CODE (new) == CONST_INT)
6702 new = plus_constant (base, INTVAL (new));
6705 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6707 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6708 new = XEXP (new, 1);
6710 new = gen_rtx_PLUS (Pmode, base, new);
6718 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6721 get_thread_pointer (int to_reg)
6725 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6729 reg = gen_reg_rtx (Pmode);
6730 insn = gen_rtx_SET (VOIDmode, reg, tp);
6731 insn = emit_insn (insn);
6736 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6737 false if we expect this to be used for a memory address and true if
6738 we expect to load the address into a register. */
6741 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6743 rtx dest, base, off, pic, tp;
6748 case TLS_MODEL_GLOBAL_DYNAMIC:
6749 dest = gen_reg_rtx (Pmode);
6750 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6752 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6754 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6757 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6758 insns = get_insns ();
6761 emit_libcall_block (insns, dest, rax, x);
6763 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6764 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6766 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6768 if (TARGET_GNU2_TLS)
6770 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6772 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6776 case TLS_MODEL_LOCAL_DYNAMIC:
6777 base = gen_reg_rtx (Pmode);
6778 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6780 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6782 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6785 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6786 insns = get_insns ();
6789 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6790 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6791 emit_libcall_block (insns, base, rax, note);
6793 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6794 emit_insn (gen_tls_local_dynamic_base_64 (base));
6796 emit_insn (gen_tls_local_dynamic_base_32 (base));
6798 if (TARGET_GNU2_TLS)
6800 rtx x = ix86_tls_module_base ();
6802 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6803 gen_rtx_MINUS (Pmode, x, tp));
6806 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6807 off = gen_rtx_CONST (Pmode, off);
6809 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6811 if (TARGET_GNU2_TLS)
6813 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6815 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6820 case TLS_MODEL_INITIAL_EXEC:
6824 type = UNSPEC_GOTNTPOFF;
6828 if (reload_in_progress)
6829 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6830 pic = pic_offset_table_rtx;
6831 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6833 else if (!TARGET_ANY_GNU_TLS)
6835 pic = gen_reg_rtx (Pmode);
6836 emit_insn (gen_set_got (pic));
6837 type = UNSPEC_GOTTPOFF;
6842 type = UNSPEC_INDNTPOFF;
6845 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6846 off = gen_rtx_CONST (Pmode, off);
6848 off = gen_rtx_PLUS (Pmode, pic, off);
6849 off = gen_const_mem (Pmode, off);
6850 set_mem_alias_set (off, ix86_GOT_alias_set ());
6852 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6854 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6855 off = force_reg (Pmode, off);
6856 return gen_rtx_PLUS (Pmode, base, off);
6860 base = get_thread_pointer (true);
6861 dest = gen_reg_rtx (Pmode);
6862 emit_insn (gen_subsi3 (dest, base, off));
6866 case TLS_MODEL_LOCAL_EXEC:
6867 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6868 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6869 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6870 off = gen_rtx_CONST (Pmode, off);
6872 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6874 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6875 return gen_rtx_PLUS (Pmode, base, off);
6879 base = get_thread_pointer (true);
6880 dest = gen_reg_rtx (Pmode);
6881 emit_insn (gen_subsi3 (dest, base, off));
6892 /* Try machine-dependent ways of modifying an illegitimate address
6893 to be legitimate. If we find one, return the new, valid address.
6894 This macro is used in only one place: `memory_address' in explow.c.
6896 OLDX is the address as it was before break_out_memory_refs was called.
6897 In some cases it is useful to look at this to decide what needs to be done.
6899 MODE and WIN are passed so that this macro can use
6900 GO_IF_LEGITIMATE_ADDRESS.
6902 It is always safe for this macro to do nothing. It exists to recognize
6903 opportunities to optimize the output.
6905 For the 80386, we handle X+REG by loading X into a register R and
6906 using R+REG. R will go in a general reg and indexing will be used.
6907 However, if REG is a broken-out memory address or multiplication,
6908 nothing needs to be done because REG can certainly go in a general reg.
6910 When -fpic is used, special handling is needed for symbolic references.
6911 See comments by legitimize_pic_address in i386.c for details. */
6914 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6919 if (TARGET_DEBUG_ADDR)
6921 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6922 GET_MODE_NAME (mode));
6926 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6928 return legitimize_tls_address (x, log, false);
6929 if (GET_CODE (x) == CONST
6930 && GET_CODE (XEXP (x, 0)) == PLUS
6931 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6932 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6934 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6935 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6938 if (flag_pic && SYMBOLIC_CONST (x))
6939 return legitimize_pic_address (x, 0);
6941 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6942 if (GET_CODE (x) == ASHIFT
6943 && GET_CODE (XEXP (x, 1)) == CONST_INT
6944 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6947 log = INTVAL (XEXP (x, 1));
6948 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6949 GEN_INT (1 << log));
6952 if (GET_CODE (x) == PLUS)
6954 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6956 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6957 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6961 log = INTVAL (XEXP (XEXP (x, 0), 1));
6962 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6963 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6964 GEN_INT (1 << log));
6967 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6968 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6969 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6972 log = INTVAL (XEXP (XEXP (x, 1), 1));
6973 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6974 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6975 GEN_INT (1 << log));
6978 /* Put multiply first if it isn't already. */
6979 if (GET_CODE (XEXP (x, 1)) == MULT)
6981 rtx tmp = XEXP (x, 0);
6982 XEXP (x, 0) = XEXP (x, 1);
6987 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6988 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6989 created by virtual register instantiation, register elimination, and
6990 similar optimizations. */
6991 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6994 x = gen_rtx_PLUS (Pmode,
6995 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6996 XEXP (XEXP (x, 1), 0)),
6997 XEXP (XEXP (x, 1), 1));
7001 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7002 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7003 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7005 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7006 && CONSTANT_P (XEXP (x, 1)))
7009 rtx other = NULL_RTX;
7011 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7013 constant = XEXP (x, 1);
7014 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7016 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7018 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7019 other = XEXP (x, 1);
7027 x = gen_rtx_PLUS (Pmode,
7028 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7029 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7030 plus_constant (other, INTVAL (constant)));
7034 if (changed && legitimate_address_p (mode, x, FALSE))
7037 if (GET_CODE (XEXP (x, 0)) == MULT)
7040 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7043 if (GET_CODE (XEXP (x, 1)) == MULT)
7046 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7050 && GET_CODE (XEXP (x, 1)) == REG
7051 && GET_CODE (XEXP (x, 0)) == REG)
7054 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7057 x = legitimize_pic_address (x, 0);
7060 if (changed && legitimate_address_p (mode, x, FALSE))
7063 if (GET_CODE (XEXP (x, 0)) == REG)
7065 rtx temp = gen_reg_rtx (Pmode);
7066 rtx val = force_operand (XEXP (x, 1), temp);
7068 emit_move_insn (temp, val);
7074 else if (GET_CODE (XEXP (x, 1)) == REG)
7076 rtx temp = gen_reg_rtx (Pmode);
7077 rtx val = force_operand (XEXP (x, 0), temp);
7079 emit_move_insn (temp, val);
7089 /* Print an integer constant expression in assembler syntax. Addition
7090 and subtraction are the only arithmetic that may appear in these
7091 expressions. FILE is the stdio stream to write to, X is the rtx, and
7092 CODE is the operand print code from the output string. */
7095 output_pic_addr_const (FILE *file, rtx x, int code)
7099 switch (GET_CODE (x))
7102 gcc_assert (flag_pic);
7107 output_addr_const (file, x);
7108 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7109 fputs ("@PLT", file);
7116 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7117 assemble_name (asm_out_file, buf);
7121 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7125 /* This used to output parentheses around the expression,
7126 but that does not work on the 386 (either ATT or BSD assembler). */
7127 output_pic_addr_const (file, XEXP (x, 0), code);
7131 if (GET_MODE (x) == VOIDmode)
7133 /* We can use %d if the number is <32 bits and positive. */
7134 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7135 fprintf (file, "0x%lx%08lx",
7136 (unsigned long) CONST_DOUBLE_HIGH (x),
7137 (unsigned long) CONST_DOUBLE_LOW (x));
7139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7142 /* We can't handle floating point constants;
7143 PRINT_OPERAND must handle them. */
7144 output_operand_lossage ("floating constant misused");
7148 /* Some assemblers need integer constants to appear first. */
7149 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7151 output_pic_addr_const (file, XEXP (x, 0), code);
7153 output_pic_addr_const (file, XEXP (x, 1), code);
7157 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7158 output_pic_addr_const (file, XEXP (x, 1), code);
7160 output_pic_addr_const (file, XEXP (x, 0), code);
7166 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7167 output_pic_addr_const (file, XEXP (x, 0), code);
7169 output_pic_addr_const (file, XEXP (x, 1), code);
7171 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7175 gcc_assert (XVECLEN (x, 0) == 1);
7176 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7177 switch (XINT (x, 1))
7180 fputs ("@GOT", file);
7183 fputs ("@GOTOFF", file);
7185 case UNSPEC_GOTPCREL:
7186 fputs ("@GOTPCREL(%rip)", file);
7188 case UNSPEC_GOTTPOFF:
7189 /* FIXME: This might be @TPOFF in Sun ld too. */
7190 fputs ("@GOTTPOFF", file);
7193 fputs ("@TPOFF", file);
7197 fputs ("@TPOFF", file);
7199 fputs ("@NTPOFF", file);
7202 fputs ("@DTPOFF", file);
7204 case UNSPEC_GOTNTPOFF:
7206 fputs ("@GOTTPOFF(%rip)", file);
7208 fputs ("@GOTNTPOFF", file);
7210 case UNSPEC_INDNTPOFF:
7211 fputs ("@INDNTPOFF", file);
7214 output_operand_lossage ("invalid UNSPEC as operand");
7220 output_operand_lossage ("invalid expression as operand");
7224 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7225 We need to emit DTP-relative relocations. */
7228 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7230 fputs (ASM_LONG, file);
7231 output_addr_const (file, x);
7232 fputs ("@DTPOFF", file);
7238 fputs (", 0", file);
7245 /* In the name of slightly smaller debug output, and to cater to
7246 general assembler lossage, recognize PIC+GOTOFF and turn it back
7247 into a direct symbol reference.
7249 On Darwin, this is necessary to avoid a crash, because Darwin
7250 has a different PIC label for each routine but the DWARF debugging
7251 information is not associated with any particular routine, so it's
7252 necessary to remove references to the PIC label from RTL stored by
7253 the DWARF output code. */
7256 ix86_delegitimize_address (rtx orig_x)
7259 /* reg_addend is NULL or a multiple of some register. */
7260 rtx reg_addend = NULL_RTX;
7261 /* const_addend is NULL or a const_int. */
7262 rtx const_addend = NULL_RTX;
7263 /* This is the result, or NULL. */
7264 rtx result = NULL_RTX;
7266 if (GET_CODE (x) == MEM)
7271 if (GET_CODE (x) != CONST
7272 || GET_CODE (XEXP (x, 0)) != UNSPEC
7273 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7274 || GET_CODE (orig_x) != MEM)
7276 return XVECEXP (XEXP (x, 0), 0, 0);
7279 if (GET_CODE (x) != PLUS
7280 || GET_CODE (XEXP (x, 1)) != CONST)
7283 if (GET_CODE (XEXP (x, 0)) == REG
7284 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7285 /* %ebx + GOT/GOTOFF */
7287 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7289 /* %ebx + %reg * scale + GOT/GOTOFF */
7290 reg_addend = XEXP (x, 0);
7291 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7292 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7293 reg_addend = XEXP (reg_addend, 1);
7294 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7295 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7296 reg_addend = XEXP (reg_addend, 0);
7299 if (GET_CODE (reg_addend) != REG
7300 && GET_CODE (reg_addend) != MULT
7301 && GET_CODE (reg_addend) != ASHIFT)
7307 x = XEXP (XEXP (x, 1), 0);
7308 if (GET_CODE (x) == PLUS
7309 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7311 const_addend = XEXP (x, 1);
7315 if (GET_CODE (x) == UNSPEC
7316 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7317 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7318 result = XVECEXP (x, 0, 0);
7320 if (TARGET_MACHO && darwin_local_data_pic (x)
7321 && GET_CODE (orig_x) != MEM)
7322 result = XEXP (x, 0);
7328 result = gen_rtx_PLUS (Pmode, result, const_addend);
7330 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7335 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7340 if (mode == CCFPmode || mode == CCFPUmode)
7342 enum rtx_code second_code, bypass_code;
7343 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7344 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7345 code = ix86_fp_compare_code_to_integer (code);
7349 code = reverse_condition (code);
7360 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7364 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7365 Those same assemblers have the same but opposite lossage on cmov. */
7366 gcc_assert (mode == CCmode);
7367 suffix = fp ? "nbe" : "a";
7387 gcc_assert (mode == CCmode);
7409 gcc_assert (mode == CCmode);
7410 suffix = fp ? "nb" : "ae";
7413 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7417 gcc_assert (mode == CCmode);
7421 suffix = fp ? "u" : "p";
7424 suffix = fp ? "nu" : "np";
7429 fputs (suffix, file);
7432 /* Print the name of register X to FILE based on its machine mode and number.
7433 If CODE is 'w', pretend the mode is HImode.
7434 If CODE is 'b', pretend the mode is QImode.
7435 If CODE is 'k', pretend the mode is SImode.
7436 If CODE is 'q', pretend the mode is DImode.
7437 If CODE is 'h', pretend the reg is the 'high' byte register.
7438 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7441 print_reg (rtx x, int code, FILE *file)
7443 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7444 && REGNO (x) != FRAME_POINTER_REGNUM
7445 && REGNO (x) != FLAGS_REG
7446 && REGNO (x) != FPSR_REG);
7448 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7451 if (code == 'w' || MMX_REG_P (x))
7453 else if (code == 'b')
7455 else if (code == 'k')
7457 else if (code == 'q')
7459 else if (code == 'y')
7461 else if (code == 'h')
7464 code = GET_MODE_SIZE (GET_MODE (x));
7466 /* Irritatingly, AMD extended registers use different naming convention
7467 from the normal registers. */
7468 if (REX_INT_REG_P (x))
7470 gcc_assert (TARGET_64BIT);
7474 error ("extended registers have no high halves");
7477 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7480 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7483 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7486 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7489 error ("unsupported operand size for extended register");
7497 if (STACK_TOP_P (x))
7499 fputs ("st(0)", file);
7506 if (! ANY_FP_REG_P (x))
7507 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7512 fputs (hi_reg_name[REGNO (x)], file);
7515 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7517 fputs (qi_reg_name[REGNO (x)], file);
7520 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7522 fputs (qi_high_reg_name[REGNO (x)], file);
7529 /* Locate some local-dynamic symbol still in use by this function
7530 so that we can print its name in some tls_local_dynamic_base
7534 get_some_local_dynamic_name (void)
7538 if (cfun->machine->some_ld_name)
7539 return cfun->machine->some_ld_name;
7541 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7543 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7544 return cfun->machine->some_ld_name;
7550 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7554 if (GET_CODE (x) == SYMBOL_REF
7555 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7557 cfun->machine->some_ld_name = XSTR (x, 0);
7565 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7566 C -- print opcode suffix for set/cmov insn.
7567 c -- like C, but print reversed condition
7568 F,f -- likewise, but for floating-point.
7569 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7571 R -- print the prefix for register names.
7572 z -- print the opcode suffix for the size of the current operand.
7573 * -- print a star (in certain assembler syntax)
7574 A -- print an absolute memory reference.
7575 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7576 s -- print a shift double count, followed by the assemblers argument
7578 b -- print the QImode name of the register for the indicated operand.
7579 %b0 would print %al if operands[0] is reg 0.
7580 w -- likewise, print the HImode name of the register.
7581 k -- likewise, print the SImode name of the register.
7582 q -- likewise, print the DImode name of the register.
7583 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7584 y -- print "st(0)" instead of "st" as a register.
7585 D -- print condition for SSE cmp instruction.
7586 P -- if PIC, print an @PLT suffix.
7587 X -- don't print any sort of PIC '@' suffix for a symbol.
7588 & -- print some in-use local-dynamic symbol name.
7589 H -- print a memory address offset by 8; used for sse high-parts
7593 print_operand (FILE *file, rtx x, int code)
7600 if (ASSEMBLER_DIALECT == ASM_ATT)
7605 assemble_name (file, get_some_local_dynamic_name ());
7609 switch (ASSEMBLER_DIALECT)
7616 /* Intel syntax. For absolute addresses, registers should not
7617 be surrounded by braces. */
7618 if (GET_CODE (x) != REG)
7621 PRINT_OPERAND (file, x, 0);
7631 PRINT_OPERAND (file, x, 0);
7636 if (ASSEMBLER_DIALECT == ASM_ATT)
7641 if (ASSEMBLER_DIALECT == ASM_ATT)
7646 if (ASSEMBLER_DIALECT == ASM_ATT)
7651 if (ASSEMBLER_DIALECT == ASM_ATT)
7656 if (ASSEMBLER_DIALECT == ASM_ATT)
7661 if (ASSEMBLER_DIALECT == ASM_ATT)
7666 /* 387 opcodes don't get size suffixes if the operands are
7668 if (STACK_REG_P (x))
7671 /* Likewise if using Intel opcodes. */
7672 if (ASSEMBLER_DIALECT == ASM_INTEL)
7675 /* This is the size of op from size of operand. */
7676 switch (GET_MODE_SIZE (GET_MODE (x)))
7679 #ifdef HAVE_GAS_FILDS_FISTS
7685 if (GET_MODE (x) == SFmode)
7700 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7702 #ifdef GAS_MNEMONICS
7728 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7730 PRINT_OPERAND (file, x, 0);
7736 /* Little bit of braindamage here. The SSE compare instructions
7737 does use completely different names for the comparisons that the
7738 fp conditional moves. */
7739 switch (GET_CODE (x))
7754 fputs ("unord", file);
7758 fputs ("neq", file);
7762 fputs ("nlt", file);
7766 fputs ("nle", file);
7769 fputs ("ord", file);
7776 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7777 if (ASSEMBLER_DIALECT == ASM_ATT)
7779 switch (GET_MODE (x))
7781 case HImode: putc ('w', file); break;
7783 case SFmode: putc ('l', file); break;
7785 case DFmode: putc ('q', file); break;
7786 default: gcc_unreachable ();
7793 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7796 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7797 if (ASSEMBLER_DIALECT == ASM_ATT)
7800 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7803 /* Like above, but reverse condition */
7805 /* Check to see if argument to %c is really a constant
7806 and not a condition code which needs to be reversed. */
7807 if (!COMPARISON_P (x))
7809 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7812 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7815 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7816 if (ASSEMBLER_DIALECT == ASM_ATT)
7819 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7823 /* It doesn't actually matter what mode we use here, as we're
7824 only going to use this for printing. */
7825 x = adjust_address_nv (x, DImode, 8);
7832 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7835 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7838 int pred_val = INTVAL (XEXP (x, 0));
7840 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7841 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7843 int taken = pred_val > REG_BR_PROB_BASE / 2;
7844 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7846 /* Emit hints only in the case default branch prediction
7847 heuristics would fail. */
7848 if (taken != cputaken)
7850 /* We use 3e (DS) prefix for taken branches and
7851 2e (CS) prefix for not taken branches. */
7853 fputs ("ds ; ", file);
7855 fputs ("cs ; ", file);
7862 output_operand_lossage ("invalid operand code '%c'", code);
7866 if (GET_CODE (x) == REG)
7867 print_reg (x, code, file);
7869 else if (GET_CODE (x) == MEM)
7871 /* No `byte ptr' prefix for call instructions. */
7872 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7875 switch (GET_MODE_SIZE (GET_MODE (x)))
7877 case 1: size = "BYTE"; break;
7878 case 2: size = "WORD"; break;
7879 case 4: size = "DWORD"; break;
7880 case 8: size = "QWORD"; break;
7881 case 12: size = "XWORD"; break;
7882 case 16: size = "XMMWORD"; break;
7887 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7890 else if (code == 'w')
7892 else if (code == 'k')
7896 fputs (" PTR ", file);
7900 /* Avoid (%rip) for call operands. */
7901 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7902 && GET_CODE (x) != CONST_INT)
7903 output_addr_const (file, x);
7904 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7905 output_operand_lossage ("invalid constraints for operand");
7910 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7915 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7916 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7918 if (ASSEMBLER_DIALECT == ASM_ATT)
7920 fprintf (file, "0x%08lx", l);
7923 /* These float cases don't actually occur as immediate operands. */
7924 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7928 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7929 fprintf (file, "%s", dstr);
7932 else if (GET_CODE (x) == CONST_DOUBLE
7933 && GET_MODE (x) == XFmode)
7937 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7938 fprintf (file, "%s", dstr);
7943 /* We have patterns that allow zero sets of memory, for instance.
7944 In 64-bit mode, we should probably support all 8-byte vectors,
7945 since we can in fact encode that into an immediate. */
7946 if (GET_CODE (x) == CONST_VECTOR)
7948 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7954 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7956 if (ASSEMBLER_DIALECT == ASM_ATT)
7959 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7960 || GET_CODE (x) == LABEL_REF)
7962 if (ASSEMBLER_DIALECT == ASM_ATT)
7965 fputs ("OFFSET FLAT:", file);
7968 if (GET_CODE (x) == CONST_INT)
7969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7971 output_pic_addr_const (file, x, code);
7973 output_addr_const (file, x);
7977 /* Print a memory operand whose address is ADDR. */
7980 print_operand_address (FILE *file, rtx addr)
7982 struct ix86_address parts;
7983 rtx base, index, disp;
7985 int ok = ix86_decompose_address (addr, &parts);
7990 index = parts.index;
7992 scale = parts.scale;
8000 if (USER_LABEL_PREFIX[0] == 0)
8002 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8008 if (!base && !index)
8010 /* Displacement only requires special attention. */
8012 if (GET_CODE (disp) == CONST_INT)
8014 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8016 if (USER_LABEL_PREFIX[0] == 0)
8018 fputs ("ds:", file);
8020 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8023 output_pic_addr_const (file, disp, 0);
8025 output_addr_const (file, disp);
8027 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8030 if (GET_CODE (disp) == CONST
8031 && GET_CODE (XEXP (disp, 0)) == PLUS
8032 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8033 disp = XEXP (XEXP (disp, 0), 0);
8034 if (GET_CODE (disp) == LABEL_REF
8035 || (GET_CODE (disp) == SYMBOL_REF
8036 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8037 fputs ("(%rip)", file);
8042 if (ASSEMBLER_DIALECT == ASM_ATT)
8047 output_pic_addr_const (file, disp, 0);
8048 else if (GET_CODE (disp) == LABEL_REF)
8049 output_asm_label (disp);
8051 output_addr_const (file, disp);
8056 print_reg (base, 0, file);
8060 print_reg (index, 0, file);
8062 fprintf (file, ",%d", scale);
8068 rtx offset = NULL_RTX;
8072 /* Pull out the offset of a symbol; print any symbol itself. */
8073 if (GET_CODE (disp) == CONST
8074 && GET_CODE (XEXP (disp, 0)) == PLUS
8075 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8077 offset = XEXP (XEXP (disp, 0), 1);
8078 disp = gen_rtx_CONST (VOIDmode,
8079 XEXP (XEXP (disp, 0), 0));
8083 output_pic_addr_const (file, disp, 0);
8084 else if (GET_CODE (disp) == LABEL_REF)
8085 output_asm_label (disp);
8086 else if (GET_CODE (disp) == CONST_INT)
8089 output_addr_const (file, disp);
8095 print_reg (base, 0, file);
8098 if (INTVAL (offset) >= 0)
8100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8111 print_reg (index, 0, file);
8113 fprintf (file, "*%d", scale);
8121 output_addr_const_extra (FILE *file, rtx x)
8125 if (GET_CODE (x) != UNSPEC)
8128 op = XVECEXP (x, 0, 0);
8129 switch (XINT (x, 1))
8131 case UNSPEC_GOTTPOFF:
8132 output_addr_const (file, op);
8133 /* FIXME: This might be @TPOFF in Sun ld. */
8134 fputs ("@GOTTPOFF", file);
8137 output_addr_const (file, op);
8138 fputs ("@TPOFF", file);
8141 output_addr_const (file, op);
8143 fputs ("@TPOFF", file);
8145 fputs ("@NTPOFF", file);
8148 output_addr_const (file, op);
8149 fputs ("@DTPOFF", file);
8151 case UNSPEC_GOTNTPOFF:
8152 output_addr_const (file, op);
8154 fputs ("@GOTTPOFF(%rip)", file);
8156 fputs ("@GOTNTPOFF", file);
8158 case UNSPEC_INDNTPOFF:
8159 output_addr_const (file, op);
8160 fputs ("@INDNTPOFF", file);
8170 /* Split one or more DImode RTL references into pairs of SImode
8171 references. The RTL can be REG, offsettable MEM, integer constant, or
8172 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8173 split and "num" is its length. lo_half and hi_half are output arrays
8174 that parallel "operands". */
8177 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8181 rtx op = operands[num];
8183 /* simplify_subreg refuse to split volatile memory addresses,
8184 but we still have to handle it. */
8185 if (GET_CODE (op) == MEM)
8187 lo_half[num] = adjust_address (op, SImode, 0);
8188 hi_half[num] = adjust_address (op, SImode, 4);
8192 lo_half[num] = simplify_gen_subreg (SImode, op,
8193 GET_MODE (op) == VOIDmode
8194 ? DImode : GET_MODE (op), 0);
8195 hi_half[num] = simplify_gen_subreg (SImode, op,
8196 GET_MODE (op) == VOIDmode
8197 ? DImode : GET_MODE (op), 4);
8201 /* Split one or more TImode RTL references into pairs of DImode
8202 references. The RTL can be REG, offsettable MEM, integer constant, or
8203 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8204 split and "num" is its length. lo_half and hi_half are output arrays
8205 that parallel "operands". */
8208 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8212 rtx op = operands[num];
8214 /* simplify_subreg refuse to split volatile memory addresses, but we
8215 still have to handle it. */
8216 if (GET_CODE (op) == MEM)
8218 lo_half[num] = adjust_address (op, DImode, 0);
8219 hi_half[num] = adjust_address (op, DImode, 8);
8223 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8224 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8229 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8230 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8231 is the expression of the binary operation. The output may either be
8232 emitted here, or returned to the caller, like all output_* functions.
8234 There is no guarantee that the operands are the same mode, as they
8235 might be within FLOAT or FLOAT_EXTEND expressions. */
8237 #ifndef SYSV386_COMPAT
8238 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8239 wants to fix the assemblers because that causes incompatibility
8240 with gcc. No-one wants to fix gcc because that causes
8241 incompatibility with assemblers... You can use the option of
8242 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8243 #define SYSV386_COMPAT 1
8247 output_387_binary_op (rtx insn, rtx *operands)
8249 static char buf[30];
8252 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8254 #ifdef ENABLE_CHECKING
8255 /* Even if we do not want to check the inputs, this documents input
8256 constraints. Which helps in understanding the following code. */
8257 if (STACK_REG_P (operands[0])
8258 && ((REG_P (operands[1])
8259 && REGNO (operands[0]) == REGNO (operands[1])
8260 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8261 || (REG_P (operands[2])
8262 && REGNO (operands[0]) == REGNO (operands[2])
8263 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8264 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8267 gcc_assert (is_sse);
8270 switch (GET_CODE (operands[3]))
8273 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8274 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8282 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8283 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8291 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8292 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8300 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8301 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8315 if (GET_MODE (operands[0]) == SFmode)
8316 strcat (buf, "ss\t{%2, %0|%0, %2}");
8318 strcat (buf, "sd\t{%2, %0|%0, %2}");
8323 switch (GET_CODE (operands[3]))
8327 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8329 rtx temp = operands[2];
8330 operands[2] = operands[1];
8334 /* know operands[0] == operands[1]. */
8336 if (GET_CODE (operands[2]) == MEM)
8342 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8344 if (STACK_TOP_P (operands[0]))
8345 /* How is it that we are storing to a dead operand[2]?
8346 Well, presumably operands[1] is dead too. We can't
8347 store the result to st(0) as st(0) gets popped on this
8348 instruction. Instead store to operands[2] (which I
8349 think has to be st(1)). st(1) will be popped later.
8350 gcc <= 2.8.1 didn't have this check and generated
8351 assembly code that the Unixware assembler rejected. */
8352 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8354 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8358 if (STACK_TOP_P (operands[0]))
8359 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8361 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8366 if (GET_CODE (operands[1]) == MEM)
8372 if (GET_CODE (operands[2]) == MEM)
8378 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8381 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8382 derived assemblers, confusingly reverse the direction of
8383 the operation for fsub{r} and fdiv{r} when the
8384 destination register is not st(0). The Intel assembler
8385 doesn't have this brain damage. Read !SYSV386_COMPAT to
8386 figure out what the hardware really does. */
8387 if (STACK_TOP_P (operands[0]))
8388 p = "{p\t%0, %2|rp\t%2, %0}";
8390 p = "{rp\t%2, %0|p\t%0, %2}";
8392 if (STACK_TOP_P (operands[0]))
8393 /* As above for fmul/fadd, we can't store to st(0). */
8394 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8396 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8401 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8404 if (STACK_TOP_P (operands[0]))
8405 p = "{rp\t%0, %1|p\t%1, %0}";
8407 p = "{p\t%1, %0|rp\t%0, %1}";
8409 if (STACK_TOP_P (operands[0]))
8410 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8412 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8417 if (STACK_TOP_P (operands[0]))
8419 if (STACK_TOP_P (operands[1]))
8420 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8422 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8425 else if (STACK_TOP_P (operands[1]))
8428 p = "{\t%1, %0|r\t%0, %1}";
8430 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8436 p = "{r\t%2, %0|\t%0, %2}";
8438 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8451 /* Return needed mode for entity in optimize_mode_switching pass. */
8454 ix86_mode_needed (int entity, rtx insn)
8456 enum attr_i387_cw mode;
8458 /* The mode UNINITIALIZED is used to store control word after a
8459 function call or ASM pattern. The mode ANY specify that function
8460 has no requirements on the control word and make no changes in the
8461 bits we are interested in. */
8464 || (NONJUMP_INSN_P (insn)
8465 && (asm_noperands (PATTERN (insn)) >= 0
8466 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8467 return I387_CW_UNINITIALIZED;
8469 if (recog_memoized (insn) < 0)
8472 mode = get_attr_i387_cw (insn);
8477 if (mode == I387_CW_TRUNC)
8482 if (mode == I387_CW_FLOOR)
8487 if (mode == I387_CW_CEIL)
8492 if (mode == I387_CW_MASK_PM)
8503 /* Output code to initialize control word copies used by trunc?f?i and
8504 rounding patterns. CURRENT_MODE is set to current control word,
8505 while NEW_MODE is set to new control word. */
8508 emit_i387_cw_initialization (int mode)
8510 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8515 rtx reg = gen_reg_rtx (HImode);
8517 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8518 emit_move_insn (reg, stored_mode);
8520 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8525 /* round toward zero (truncate) */
8526 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8527 slot = SLOT_CW_TRUNC;
8531 /* round down toward -oo */
8532 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8533 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8534 slot = SLOT_CW_FLOOR;
8538 /* round up toward +oo */
8539 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8540 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8541 slot = SLOT_CW_CEIL;
8544 case I387_CW_MASK_PM:
8545 /* mask precision exception for nearbyint() */
8546 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8547 slot = SLOT_CW_MASK_PM;
8559 /* round toward zero (truncate) */
8560 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8561 slot = SLOT_CW_TRUNC;
8565 /* round down toward -oo */
8566 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8567 slot = SLOT_CW_FLOOR;
8571 /* round up toward +oo */
8572 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8573 slot = SLOT_CW_CEIL;
8576 case I387_CW_MASK_PM:
8577 /* mask precision exception for nearbyint() */
8578 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8579 slot = SLOT_CW_MASK_PM;
8587 gcc_assert (slot < MAX_386_STACK_LOCALS);
8589 new_mode = assign_386_stack_local (HImode, slot);
8590 emit_move_insn (new_mode, reg);
8593 /* Output code for INSN to convert a float to a signed int. OPERANDS
8594 are the insn operands. The output may be [HSD]Imode and the input
8595 operand may be [SDX]Fmode. */
8598 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8600 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8601 int dimode_p = GET_MODE (operands[0]) == DImode;
8602 int round_mode = get_attr_i387_cw (insn);
8604 /* Jump through a hoop or two for DImode, since the hardware has no
8605 non-popping instruction. We used to do this a different way, but
8606 that was somewhat fragile and broke with post-reload splitters. */
8607 if ((dimode_p || fisttp) && !stack_top_dies)
8608 output_asm_insn ("fld\t%y1", operands);
8610 gcc_assert (STACK_TOP_P (operands[1]));
8611 gcc_assert (GET_CODE (operands[0]) == MEM);
8614 output_asm_insn ("fisttp%z0\t%0", operands);
8617 if (round_mode != I387_CW_ANY)
8618 output_asm_insn ("fldcw\t%3", operands);
8619 if (stack_top_dies || dimode_p)
8620 output_asm_insn ("fistp%z0\t%0", operands);
8622 output_asm_insn ("fist%z0\t%0", operands);
8623 if (round_mode != I387_CW_ANY)
8624 output_asm_insn ("fldcw\t%2", operands);
8630 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8631 have the values zero or one, indicates the ffreep insn's operand
8632 from the OPERANDS array. */
8635 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8637 if (TARGET_USE_FFREEP)
8638 #if HAVE_AS_IX86_FFREEP
8639 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8641 switch (REGNO (operands[opno]))
8643 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8644 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8645 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8646 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8647 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8648 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8649 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8650 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8654 return opno ? "fstp\t%y1" : "fstp\t%y0";
8658 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8659 should be used. UNORDERED_P is true when fucom should be used. */
8662 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8665 rtx cmp_op0, cmp_op1;
8666 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8670 cmp_op0 = operands[0];
8671 cmp_op1 = operands[1];
8675 cmp_op0 = operands[1];
8676 cmp_op1 = operands[2];
8681 if (GET_MODE (operands[0]) == SFmode)
8683 return "ucomiss\t{%1, %0|%0, %1}";
8685 return "comiss\t{%1, %0|%0, %1}";
8688 return "ucomisd\t{%1, %0|%0, %1}";
8690 return "comisd\t{%1, %0|%0, %1}";
8693 gcc_assert (STACK_TOP_P (cmp_op0));
8695 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8697 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8701 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8702 return output_387_ffreep (operands, 1);
8705 return "ftst\n\tfnstsw\t%0";
8708 if (STACK_REG_P (cmp_op1)
8710 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8711 && REGNO (cmp_op1) != FIRST_STACK_REG)
8713 /* If both the top of the 387 stack dies, and the other operand
8714 is also a stack register that dies, then this must be a
8715 `fcompp' float compare */
8719 /* There is no double popping fcomi variant. Fortunately,
8720 eflags is immune from the fstp's cc clobbering. */
8722 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8724 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8725 return output_387_ffreep (operands, 0);
8730 return "fucompp\n\tfnstsw\t%0";
8732 return "fcompp\n\tfnstsw\t%0";
8737 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8739 static const char * const alt[16] =
8741 "fcom%z2\t%y2\n\tfnstsw\t%0",
8742 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8743 "fucom%z2\t%y2\n\tfnstsw\t%0",
8744 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8746 "ficom%z2\t%y2\n\tfnstsw\t%0",
8747 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8751 "fcomi\t{%y1, %0|%0, %y1}",
8752 "fcomip\t{%y1, %0|%0, %y1}",
8753 "fucomi\t{%y1, %0|%0, %y1}",
8754 "fucomip\t{%y1, %0|%0, %y1}",
8765 mask = eflags_p << 3;
8766 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8767 mask |= unordered_p << 1;
8768 mask |= stack_top_dies;
8770 gcc_assert (mask < 16);
8779 ix86_output_addr_vec_elt (FILE *file, int value)
8781 const char *directive = ASM_LONG;
8785 directive = ASM_QUAD;
8787 gcc_assert (!TARGET_64BIT);
8790 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8794 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8797 fprintf (file, "%s%s%d-%s%d\n",
8798 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8799 else if (HAVE_AS_GOTOFF_IN_DATA)
8800 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8802 else if (TARGET_MACHO)
8804 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8805 machopic_output_function_base_name (file);
8806 fprintf(file, "\n");
8810 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8811 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8814 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8818 ix86_expand_clear (rtx dest)
8822 /* We play register width games, which are only valid after reload. */
8823 gcc_assert (reload_completed);
8825 /* Avoid HImode and its attendant prefix byte. */
8826 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8827 dest = gen_rtx_REG (SImode, REGNO (dest));
8829 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8831 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8832 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8834 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8835 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8841 /* X is an unchanging MEM. If it is a constant pool reference, return
8842 the constant pool rtx, else NULL. */
8845 maybe_get_pool_constant (rtx x)
8847 x = ix86_delegitimize_address (XEXP (x, 0));
8849 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8850 return get_pool_constant (x);
8856 ix86_expand_move (enum machine_mode mode, rtx operands[])
8858 int strict = (reload_in_progress || reload_completed);
8860 enum tls_model model;
8865 if (GET_CODE (op1) == SYMBOL_REF)
8867 model = SYMBOL_REF_TLS_MODEL (op1);
8870 op1 = legitimize_tls_address (op1, model, true);
8871 op1 = force_operand (op1, op0);
8876 else if (GET_CODE (op1) == CONST
8877 && GET_CODE (XEXP (op1, 0)) == PLUS
8878 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8880 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8883 rtx addend = XEXP (XEXP (op1, 0), 1);
8884 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8885 op1 = force_operand (op1, NULL);
8886 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8887 op0, 1, OPTAB_DIRECT);
8893 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8895 if (TARGET_MACHO && !TARGET_64BIT)
8900 rtx temp = ((reload_in_progress
8901 || ((op0 && GET_CODE (op0) == REG)
8903 ? op0 : gen_reg_rtx (Pmode));
8904 op1 = machopic_indirect_data_reference (op1, temp);
8905 op1 = machopic_legitimize_pic_address (op1, mode,
8906 temp == op1 ? 0 : temp);
8908 else if (MACHOPIC_INDIRECT)
8909 op1 = machopic_indirect_data_reference (op1, 0);
8916 if (GET_CODE (op0) == MEM)
8917 op1 = force_reg (Pmode, op1);
8919 op1 = legitimize_address (op1, op1, Pmode);
8924 if (GET_CODE (op0) == MEM
8925 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8926 || !push_operand (op0, mode))
8927 && GET_CODE (op1) == MEM)
8928 op1 = force_reg (mode, op1);
8930 if (push_operand (op0, mode)
8931 && ! general_no_elim_operand (op1, mode))
8932 op1 = copy_to_mode_reg (mode, op1);
8934 /* Force large constants in 64bit compilation into register
8935 to get them CSEed. */
8936 if (TARGET_64BIT && mode == DImode
8937 && immediate_operand (op1, mode)
8938 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8939 && !register_operand (op0, mode)
8940 && optimize && !reload_completed && !reload_in_progress)
8941 op1 = copy_to_mode_reg (mode, op1);
8943 if (FLOAT_MODE_P (mode))
8945 /* If we are loading a floating point constant to a register,
8946 force the value to memory now, since we'll get better code
8947 out the back end. */
8951 else if (GET_CODE (op1) == CONST_DOUBLE)
8953 op1 = validize_mem (force_const_mem (mode, op1));
8954 if (!register_operand (op0, mode))
8956 rtx temp = gen_reg_rtx (mode);
8957 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8958 emit_move_insn (op0, temp);
8965 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8969 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8971 rtx op0 = operands[0], op1 = operands[1];
8973 /* Force constants other than zero into memory. We do not know how
8974 the instructions used to build constants modify the upper 64 bits
8975 of the register, once we have that information we may be able
8976 to handle some of them more efficiently. */
8977 if ((reload_in_progress | reload_completed) == 0
8978 && register_operand (op0, mode)
8980 && standard_sse_constant_p (op1) <= 0)
8981 op1 = validize_mem (force_const_mem (mode, op1));
8983 /* Make operand1 a register if it isn't already. */
8985 && !register_operand (op0, mode)
8986 && !register_operand (op1, mode))
8988 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8992 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8995 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8996 straight to ix86_expand_vector_move. */
8999 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9008 /* If we're optimizing for size, movups is the smallest. */
9011 op0 = gen_lowpart (V4SFmode, op0);
9012 op1 = gen_lowpart (V4SFmode, op1);
9013 emit_insn (gen_sse_movups (op0, op1));
9017 /* ??? If we have typed data, then it would appear that using
9018 movdqu is the only way to get unaligned data loaded with
9020 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9022 op0 = gen_lowpart (V16QImode, op0);
9023 op1 = gen_lowpart (V16QImode, op1);
9024 emit_insn (gen_sse2_movdqu (op0, op1));
9028 if (TARGET_SSE2 && mode == V2DFmode)
9032 /* When SSE registers are split into halves, we can avoid
9033 writing to the top half twice. */
9034 if (TARGET_SSE_SPLIT_REGS)
9036 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9041 /* ??? Not sure about the best option for the Intel chips.
9042 The following would seem to satisfy; the register is
9043 entirely cleared, breaking the dependency chain. We
9044 then store to the upper half, with a dependency depth
9045 of one. A rumor has it that Intel recommends two movsd
9046 followed by an unpacklpd, but this is unconfirmed. And
9047 given that the dependency depth of the unpacklpd would
9048 still be one, I'm not sure why this would be better. */
9049 zero = CONST0_RTX (V2DFmode);
9052 m = adjust_address (op1, DFmode, 0);
9053 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9054 m = adjust_address (op1, DFmode, 8);
9055 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9059 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9060 emit_move_insn (op0, CONST0_RTX (mode));
9062 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9064 if (mode != V4SFmode)
9065 op0 = gen_lowpart (V4SFmode, op0);
9066 m = adjust_address (op1, V2SFmode, 0);
9067 emit_insn (gen_sse_loadlps (op0, op0, m));
9068 m = adjust_address (op1, V2SFmode, 8);
9069 emit_insn (gen_sse_loadhps (op0, op0, m));
9072 else if (MEM_P (op0))
9074 /* If we're optimizing for size, movups is the smallest. */
9077 op0 = gen_lowpart (V4SFmode, op0);
9078 op1 = gen_lowpart (V4SFmode, op1);
9079 emit_insn (gen_sse_movups (op0, op1));
9083 /* ??? Similar to above, only less clear because of quote
9084 typeless stores unquote. */
9085 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9086 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9088 op0 = gen_lowpart (V16QImode, op0);
9089 op1 = gen_lowpart (V16QImode, op1);
9090 emit_insn (gen_sse2_movdqu (op0, op1));
9094 if (TARGET_SSE2 && mode == V2DFmode)
9096 m = adjust_address (op0, DFmode, 0);
9097 emit_insn (gen_sse2_storelpd (m, op1));
9098 m = adjust_address (op0, DFmode, 8);
9099 emit_insn (gen_sse2_storehpd (m, op1));
9103 if (mode != V4SFmode)
9104 op1 = gen_lowpart (V4SFmode, op1);
9105 m = adjust_address (op0, V2SFmode, 0);
9106 emit_insn (gen_sse_storelps (m, op1));
9107 m = adjust_address (op0, V2SFmode, 8);
9108 emit_insn (gen_sse_storehps (m, op1));
9115 /* Expand a push in MODE. This is some mode for which we do not support
9116 proper push instructions, at least from the registers that we expect
9117 the value to live in. */
9120 ix86_expand_push (enum machine_mode mode, rtx x)
9124 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9125 GEN_INT (-GET_MODE_SIZE (mode)),
9126 stack_pointer_rtx, 1, OPTAB_DIRECT);
9127 if (tmp != stack_pointer_rtx)
9128 emit_move_insn (stack_pointer_rtx, tmp);
9130 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9131 emit_move_insn (tmp, x);
9134 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9135 destination to use for the operation. If different from the true
9136 destination in operands[0], a copy operation will be required. */
9139 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9142 int matching_memory;
9143 rtx src1, src2, dst;
9149 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9150 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9151 && (rtx_equal_p (dst, src2)
9152 || immediate_operand (src1, mode)))
9159 /* If the destination is memory, and we do not have matching source
9160 operands, do things in registers. */
9161 matching_memory = 0;
9162 if (GET_CODE (dst) == MEM)
9164 if (rtx_equal_p (dst, src1))
9165 matching_memory = 1;
9166 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9167 && rtx_equal_p (dst, src2))
9168 matching_memory = 2;
9170 dst = gen_reg_rtx (mode);
9173 /* Both source operands cannot be in memory. */
9174 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9176 if (matching_memory != 2)
9177 src2 = force_reg (mode, src2);
9179 src1 = force_reg (mode, src1);
9182 /* If the operation is not commutable, source 1 cannot be a constant
9183 or non-matching memory. */
9184 if ((CONSTANT_P (src1)
9185 || (!matching_memory && GET_CODE (src1) == MEM))
9186 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9187 src1 = force_reg (mode, src1);
9189 src1 = operands[1] = src1;
9190 src2 = operands[2] = src2;
9194 /* Similarly, but assume that the destination has already been
9198 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9199 enum machine_mode mode, rtx operands[])
9201 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9202 gcc_assert (dst == operands[0]);
9205 /* Attempt to expand a binary operator. Make the expansion closer to the
9206 actual machine, then just general_operand, which will allow 3 separate
9207 memory references (one output, two input) in a single insn. */
9210 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9213 rtx src1, src2, dst, op, clob;
9215 dst = ix86_fixup_binary_operands (code, mode, operands);
9219 /* Emit the instruction. */
9221 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9222 if (reload_in_progress)
9224 /* Reload doesn't know about the flags register, and doesn't know that
9225 it doesn't want to clobber it. We can only do this with PLUS. */
9226 gcc_assert (code == PLUS);
9231 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9232 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9235 /* Fix up the destination if needed. */
9236 if (dst != operands[0])
9237 emit_move_insn (operands[0], dst);
9240 /* Return TRUE or FALSE depending on whether the binary operator meets the
9241 appropriate constraints. */
9244 ix86_binary_operator_ok (enum rtx_code code,
9245 enum machine_mode mode ATTRIBUTE_UNUSED,
9248 /* Both source operands cannot be in memory. */
9249 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9251 /* If the operation is not commutable, source 1 cannot be a constant. */
9252 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9254 /* If the destination is memory, we must have a matching source operand. */
9255 if (GET_CODE (operands[0]) == MEM
9256 && ! (rtx_equal_p (operands[0], operands[1])
9257 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9258 && rtx_equal_p (operands[0], operands[2]))))
9260 /* If the operation is not commutable and the source 1 is memory, we must
9261 have a matching destination. */
9262 if (GET_CODE (operands[1]) == MEM
9263 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9264 && ! rtx_equal_p (operands[0], operands[1]))
9269 /* Attempt to expand a unary operator. Make the expansion closer to the
9270 actual machine, then just general_operand, which will allow 2 separate
9271 memory references (one output, one input) in a single insn. */
9274 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9277 int matching_memory;
9278 rtx src, dst, op, clob;
9283 /* If the destination is memory, and we do not have matching source
9284 operands, do things in registers. */
9285 matching_memory = 0;
9288 if (rtx_equal_p (dst, src))
9289 matching_memory = 1;
9291 dst = gen_reg_rtx (mode);
9294 /* When source operand is memory, destination must match. */
9295 if (MEM_P (src) && !matching_memory)
9296 src = force_reg (mode, src);
9298 /* Emit the instruction. */
9300 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9301 if (reload_in_progress || code == NOT)
9303 /* Reload doesn't know about the flags register, and doesn't know that
9304 it doesn't want to clobber it. */
9305 gcc_assert (code == NOT);
9310 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9311 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9314 /* Fix up the destination if needed. */
9315 if (dst != operands[0])
9316 emit_move_insn (operands[0], dst);
9319 /* Return TRUE or FALSE depending on whether the unary operator meets the
9320 appropriate constraints. */
9323 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9324 enum machine_mode mode ATTRIBUTE_UNUSED,
9325 rtx operands[2] ATTRIBUTE_UNUSED)
9327 /* If one of operands is memory, source and destination must match. */
9328 if ((GET_CODE (operands[0]) == MEM
9329 || GET_CODE (operands[1]) == MEM)
9330 && ! rtx_equal_p (operands[0], operands[1]))
9335 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9336 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9337 true, then replicate the mask for all elements of the vector register.
9338 If INVERT is true, then create a mask excluding the sign bit. */
9341 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9343 enum machine_mode vec_mode;
9344 HOST_WIDE_INT hi, lo;
9349 /* Find the sign bit, sign extended to 2*HWI. */
9351 lo = 0x80000000, hi = lo < 0;
9352 else if (HOST_BITS_PER_WIDE_INT >= 64)
9353 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9355 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9360 /* Force this value into the low part of a fp vector constant. */
9361 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9362 mask = gen_lowpart (mode, mask);
9367 v = gen_rtvec (4, mask, mask, mask, mask);
9369 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9370 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9371 vec_mode = V4SFmode;
9376 v = gen_rtvec (2, mask, mask);
9378 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9379 vec_mode = V2DFmode;
9382 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9385 /* Generate code for floating point ABS or NEG. */
9388 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9391 rtx mask, set, use, clob, dst, src;
9392 bool matching_memory;
9393 bool use_sse = false;
9394 bool vector_mode = VECTOR_MODE_P (mode);
9395 enum machine_mode elt_mode = mode;
9399 elt_mode = GET_MODE_INNER (mode);
9402 else if (TARGET_SSE_MATH)
9403 use_sse = SSE_FLOAT_MODE_P (mode);
9405 /* NEG and ABS performed with SSE use bitwise mask operations.
9406 Create the appropriate mask now. */
9408 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9415 /* If the destination is memory, and we don't have matching source
9416 operands or we're using the x87, do things in registers. */
9417 matching_memory = false;
9420 if (use_sse && rtx_equal_p (dst, src))
9421 matching_memory = true;
9423 dst = gen_reg_rtx (mode);
9425 if (MEM_P (src) && !matching_memory)
9426 src = force_reg (mode, src);
9430 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9431 set = gen_rtx_SET (VOIDmode, dst, set);
9436 set = gen_rtx_fmt_e (code, mode, src);
9437 set = gen_rtx_SET (VOIDmode, dst, set);
9440 use = gen_rtx_USE (VOIDmode, mask);
9441 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9442 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9443 gen_rtvec (3, set, use, clob)));
9449 if (dst != operands[0])
9450 emit_move_insn (operands[0], dst);
9453 /* Expand a copysign operation. Special case operand 0 being a constant. */
9456 ix86_expand_copysign (rtx operands[])
9458 enum machine_mode mode, vmode;
9459 rtx dest, op0, op1, mask, nmask;
9465 mode = GET_MODE (dest);
9466 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9468 if (GET_CODE (op0) == CONST_DOUBLE)
9472 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9473 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9475 if (op0 == CONST0_RTX (mode))
9476 op0 = CONST0_RTX (vmode);
9480 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9481 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9483 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9484 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9487 mask = ix86_build_signbit_mask (mode, 0, 0);
9490 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9492 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9496 nmask = ix86_build_signbit_mask (mode, 0, 1);
9497 mask = ix86_build_signbit_mask (mode, 0, 0);
9500 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9502 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9506 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9507 be a constant, and so has already been expanded into a vector constant. */
9510 ix86_split_copysign_const (rtx operands[])
9512 enum machine_mode mode, vmode;
9513 rtx dest, op0, op1, mask, x;
9520 mode = GET_MODE (dest);
9521 vmode = GET_MODE (mask);
9523 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9524 x = gen_rtx_AND (vmode, dest, mask);
9525 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9527 if (op0 != CONST0_RTX (vmode))
9529 x = gen_rtx_IOR (vmode, dest, op0);
9530 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9534 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9535 so we have to do two masks. */
9538 ix86_split_copysign_var (rtx operands[])
9540 enum machine_mode mode, vmode;
9541 rtx dest, scratch, op0, op1, mask, nmask, x;
9544 scratch = operands[1];
9547 nmask = operands[4];
9550 mode = GET_MODE (dest);
9551 vmode = GET_MODE (mask);
9553 if (rtx_equal_p (op0, op1))
9555 /* Shouldn't happen often (it's useless, obviously), but when it does
9556 we'd generate incorrect code if we continue below. */
9557 emit_move_insn (dest, op0);
9561 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9563 gcc_assert (REGNO (op1) == REGNO (scratch));
9565 x = gen_rtx_AND (vmode, scratch, mask);
9566 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9569 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9570 x = gen_rtx_NOT (vmode, dest);
9571 x = gen_rtx_AND (vmode, x, op0);
9572 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9576 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9578 x = gen_rtx_AND (vmode, scratch, mask);
9580 else /* alternative 2,4 */
9582 gcc_assert (REGNO (mask) == REGNO (scratch));
9583 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9584 x = gen_rtx_AND (vmode, scratch, op1);
9586 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9588 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9590 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9591 x = gen_rtx_AND (vmode, dest, nmask);
9593 else /* alternative 3,4 */
9595 gcc_assert (REGNO (nmask) == REGNO (dest));
9597 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9598 x = gen_rtx_AND (vmode, dest, op0);
9600 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9603 x = gen_rtx_IOR (vmode, dest, scratch);
9604 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9607 /* Return TRUE or FALSE depending on whether the first SET in INSN
9608 has source and destination with matching CC modes, and that the
9609 CC mode is at least as constrained as REQ_MODE. */
9612 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9615 enum machine_mode set_mode;
9617 set = PATTERN (insn);
9618 if (GET_CODE (set) == PARALLEL)
9619 set = XVECEXP (set, 0, 0);
9620 gcc_assert (GET_CODE (set) == SET);
9621 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9623 set_mode = GET_MODE (SET_DEST (set));
9627 if (req_mode != CCNOmode
9628 && (req_mode != CCmode
9629 || XEXP (SET_SRC (set), 1) != const0_rtx))
9633 if (req_mode == CCGCmode)
9637 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9641 if (req_mode == CCZmode)
9651 return (GET_MODE (SET_SRC (set)) == set_mode);
9654 /* Generate insn patterns to do an integer compare of OPERANDS. */
9657 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9659 enum machine_mode cmpmode;
9662 cmpmode = SELECT_CC_MODE (code, op0, op1);
9663 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9665 /* This is very simple, but making the interface the same as in the
9666 FP case makes the rest of the code easier. */
9667 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9668 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9670 /* Return the test that should be put into the flags user, i.e.
9671 the bcc, scc, or cmov instruction. */
9672 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9675 /* Figure out whether to use ordered or unordered fp comparisons.
9676 Return the appropriate mode to use. */
9679 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9681 /* ??? In order to make all comparisons reversible, we do all comparisons
9682 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9683 all forms trapping and nontrapping comparisons, we can make inequality
9684 comparisons trapping again, since it results in better code when using
9685 FCOM based compares. */
9686 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9690 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9692 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9693 return ix86_fp_compare_mode (code);
9696 /* Only zero flag is needed. */
9698 case NE: /* ZF!=0 */
9700 /* Codes needing carry flag. */
9701 case GEU: /* CF=0 */
9702 case GTU: /* CF=0 & ZF=0 */
9703 case LTU: /* CF=1 */
9704 case LEU: /* CF=1 | ZF=1 */
9706 /* Codes possibly doable only with sign flag when
9707 comparing against zero. */
9708 case GE: /* SF=OF or SF=0 */
9709 case LT: /* SF<>OF or SF=1 */
9710 if (op1 == const0_rtx)
9713 /* For other cases Carry flag is not required. */
9715 /* Codes doable only with sign flag when comparing
9716 against zero, but we miss jump instruction for it
9717 so we need to use relational tests against overflow
9718 that thus needs to be zero. */
9719 case GT: /* ZF=0 & SF=OF */
9720 case LE: /* ZF=1 | SF<>OF */
9721 if (op1 == const0_rtx)
9725 /* strcmp pattern do (use flags) and combine may ask us for proper
9734 /* Return the fixed registers used for condition codes. */
9737 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9744 /* If two condition code modes are compatible, return a condition code
9745 mode which is compatible with both. Otherwise, return
9748 static enum machine_mode
9749 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9754 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9757 if ((m1 == CCGCmode && m2 == CCGOCmode)
9758 || (m1 == CCGOCmode && m2 == CCGCmode))
9786 /* These are only compatible with themselves, which we already
9792 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9795 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9797 enum rtx_code swapped_code = swap_condition (code);
9798 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9799 || (ix86_fp_comparison_cost (swapped_code)
9800 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9803 /* Swap, force into registers, or otherwise massage the two operands
9804 to a fp comparison. The operands are updated in place; the new
9805 comparison code is returned. */
9807 static enum rtx_code
9808 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9810 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9811 rtx op0 = *pop0, op1 = *pop1;
9812 enum machine_mode op_mode = GET_MODE (op0);
9813 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9815 /* All of the unordered compare instructions only work on registers.
9816 The same is true of the fcomi compare instructions. The XFmode
9817 compare instructions require registers except when comparing
9818 against zero or when converting operand 1 from fixed point to
9822 && (fpcmp_mode == CCFPUmode
9823 || (op_mode == XFmode
9824 && ! (standard_80387_constant_p (op0) == 1
9825 || standard_80387_constant_p (op1) == 1)
9826 && GET_CODE (op1) != FLOAT)
9827 || ix86_use_fcomi_compare (code)))
9829 op0 = force_reg (op_mode, op0);
9830 op1 = force_reg (op_mode, op1);
9834 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9835 things around if they appear profitable, otherwise force op0
9838 if (standard_80387_constant_p (op0) == 0
9839 || (GET_CODE (op0) == MEM
9840 && ! (standard_80387_constant_p (op1) == 0
9841 || GET_CODE (op1) == MEM)))
9844 tmp = op0, op0 = op1, op1 = tmp;
9845 code = swap_condition (code);
9848 if (GET_CODE (op0) != REG)
9849 op0 = force_reg (op_mode, op0);
9851 if (CONSTANT_P (op1))
9853 int tmp = standard_80387_constant_p (op1);
9855 op1 = validize_mem (force_const_mem (op_mode, op1));
9859 op1 = force_reg (op_mode, op1);
9862 op1 = force_reg (op_mode, op1);
9866 /* Try to rearrange the comparison to make it cheaper. */
9867 if (ix86_fp_comparison_cost (code)
9868 > ix86_fp_comparison_cost (swap_condition (code))
9869 && (GET_CODE (op1) == REG || !no_new_pseudos))
9872 tmp = op0, op0 = op1, op1 = tmp;
9873 code = swap_condition (code);
9874 if (GET_CODE (op0) != REG)
9875 op0 = force_reg (op_mode, op0);
9883 /* Convert comparison codes we use to represent FP comparison to integer
9884 code that will result in proper branch. Return UNKNOWN if no such code
9888 ix86_fp_compare_code_to_integer (enum rtx_code code)
9917 /* Split comparison code CODE into comparisons we can do using branch
9918 instructions. BYPASS_CODE is comparison code for branch that will
9919 branch around FIRST_CODE and SECOND_CODE. If some of branches
9920 is not required, set value to UNKNOWN.
9921 We never require more than two branches. */
9924 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9925 enum rtx_code *first_code,
9926 enum rtx_code *second_code)
9929 *bypass_code = UNKNOWN;
9930 *second_code = UNKNOWN;
9932 /* The fcomi comparison sets flags as follows:
9942 case GT: /* GTU - CF=0 & ZF=0 */
9943 case GE: /* GEU - CF=0 */
9944 case ORDERED: /* PF=0 */
9945 case UNORDERED: /* PF=1 */
9946 case UNEQ: /* EQ - ZF=1 */
9947 case UNLT: /* LTU - CF=1 */
9948 case UNLE: /* LEU - CF=1 | ZF=1 */
9949 case LTGT: /* EQ - ZF=0 */
9951 case LT: /* LTU - CF=1 - fails on unordered */
9953 *bypass_code = UNORDERED;
9955 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9957 *bypass_code = UNORDERED;
9959 case EQ: /* EQ - ZF=1 - fails on unordered */
9961 *bypass_code = UNORDERED;
9963 case NE: /* NE - ZF=0 - fails on unordered */
9965 *second_code = UNORDERED;
9967 case UNGE: /* GEU - CF=0 - fails on unordered */
9969 *second_code = UNORDERED;
9971 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9973 *second_code = UNORDERED;
9978 if (!TARGET_IEEE_FP)
9980 *second_code = UNKNOWN;
9981 *bypass_code = UNKNOWN;
9985 /* Return cost of comparison done fcom + arithmetics operations on AX.
9986 All following functions do use number of instructions as a cost metrics.
9987 In future this should be tweaked to compute bytes for optimize_size and
9988 take into account performance of various instructions on various CPUs. */
9990 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9992 if (!TARGET_IEEE_FP)
9994 /* The cost of code output by ix86_expand_fp_compare. */
10018 gcc_unreachable ();
10022 /* Return cost of comparison done using fcomi operation.
10023 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10025 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10027 enum rtx_code bypass_code, first_code, second_code;
10028 /* Return arbitrarily high cost when instruction is not supported - this
10029 prevents gcc from using it. */
10032 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10033 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10036 /* Return cost of comparison done using sahf operation.
10037 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10039 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10041 enum rtx_code bypass_code, first_code, second_code;
10042 /* Return arbitrarily high cost when instruction is not preferred - this
10043 avoids gcc from using it. */
10044 if (!TARGET_USE_SAHF && !optimize_size)
10046 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10047 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10050 /* Compute cost of the comparison done using any method.
10051 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10053 ix86_fp_comparison_cost (enum rtx_code code)
10055 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10058 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10059 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10061 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10062 if (min > sahf_cost)
10064 if (min > fcomi_cost)
10069 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10072 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10073 rtx *second_test, rtx *bypass_test)
10075 enum machine_mode fpcmp_mode, intcmp_mode;
10077 int cost = ix86_fp_comparison_cost (code);
10078 enum rtx_code bypass_code, first_code, second_code;
10080 fpcmp_mode = ix86_fp_compare_mode (code);
10081 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10084 *second_test = NULL_RTX;
10086 *bypass_test = NULL_RTX;
10088 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10090 /* Do fcomi/sahf based test when profitable. */
10091 if ((bypass_code == UNKNOWN || bypass_test)
10092 && (second_code == UNKNOWN || second_test)
10093 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10097 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10098 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10104 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10105 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10107 scratch = gen_reg_rtx (HImode);
10108 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10109 emit_insn (gen_x86_sahf_1 (scratch));
10112 /* The FP codes work out to act like unsigned. */
10113 intcmp_mode = fpcmp_mode;
10115 if (bypass_code != UNKNOWN)
10116 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10117 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10119 if (second_code != UNKNOWN)
10120 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10121 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10126 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10127 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10128 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10130 scratch = gen_reg_rtx (HImode);
10131 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10133 /* In the unordered case, we have to check C2 for NaN's, which
10134 doesn't happen to work out to anything nice combination-wise.
10135 So do some bit twiddling on the value we've got in AH to come
10136 up with an appropriate set of condition codes. */
10138 intcmp_mode = CCNOmode;
10143 if (code == GT || !TARGET_IEEE_FP)
10145 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10150 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10151 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10152 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10153 intcmp_mode = CCmode;
10159 if (code == LT && TARGET_IEEE_FP)
10161 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10162 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10163 intcmp_mode = CCmode;
10168 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10174 if (code == GE || !TARGET_IEEE_FP)
10176 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10181 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10182 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10189 if (code == LE && TARGET_IEEE_FP)
10191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10192 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10194 intcmp_mode = CCmode;
10199 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10205 if (code == EQ && TARGET_IEEE_FP)
10207 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10208 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10209 intcmp_mode = CCmode;
10214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10221 if (code == NE && TARGET_IEEE_FP)
10223 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10224 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10230 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10236 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10245 gcc_unreachable ();
10249 /* Return the test that should be put into the flags user, i.e.
10250 the bcc, scc, or cmov instruction. */
10251 return gen_rtx_fmt_ee (code, VOIDmode,
10252 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10257 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10260 op0 = ix86_compare_op0;
10261 op1 = ix86_compare_op1;
10264 *second_test = NULL_RTX;
10266 *bypass_test = NULL_RTX;
10268 if (ix86_compare_emitted)
10270 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10271 ix86_compare_emitted = NULL_RTX;
10273 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10274 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10275 second_test, bypass_test);
10277 ret = ix86_expand_int_compare (code, op0, op1);
10282 /* Return true if the CODE will result in nontrivial jump sequence. */
10284 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10286 enum rtx_code bypass_code, first_code, second_code;
10289 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10290 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10294 ix86_expand_branch (enum rtx_code code, rtx label)
10298 /* If we have emitted a compare insn, go straight to simple.
10299 ix86_expand_compare won't emit anything if ix86_compare_emitted
10301 if (ix86_compare_emitted)
10304 switch (GET_MODE (ix86_compare_op0))
10310 tmp = ix86_expand_compare (code, NULL, NULL);
10311 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10312 gen_rtx_LABEL_REF (VOIDmode, label),
10314 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10323 enum rtx_code bypass_code, first_code, second_code;
10325 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10326 &ix86_compare_op1);
10328 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10330 /* Check whether we will use the natural sequence with one jump. If
10331 so, we can expand jump early. Otherwise delay expansion by
10332 creating compound insn to not confuse optimizers. */
10333 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10336 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10337 gen_rtx_LABEL_REF (VOIDmode, label),
10338 pc_rtx, NULL_RTX, NULL_RTX);
10342 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10343 ix86_compare_op0, ix86_compare_op1);
10344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10345 gen_rtx_LABEL_REF (VOIDmode, label),
10347 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10349 use_fcomi = ix86_use_fcomi_compare (code);
10350 vec = rtvec_alloc (3 + !use_fcomi);
10351 RTVEC_ELT (vec, 0) = tmp;
10353 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10355 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10358 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10360 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10369 /* Expand DImode branch into multiple compare+branch. */
10371 rtx lo[2], hi[2], label2;
10372 enum rtx_code code1, code2, code3;
10373 enum machine_mode submode;
10375 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10377 tmp = ix86_compare_op0;
10378 ix86_compare_op0 = ix86_compare_op1;
10379 ix86_compare_op1 = tmp;
10380 code = swap_condition (code);
10382 if (GET_MODE (ix86_compare_op0) == DImode)
10384 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10385 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10390 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10391 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10395 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10396 avoid two branches. This costs one extra insn, so disable when
10397 optimizing for size. */
10399 if ((code == EQ || code == NE)
10401 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10406 if (hi[1] != const0_rtx)
10407 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10408 NULL_RTX, 0, OPTAB_WIDEN);
10411 if (lo[1] != const0_rtx)
10412 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10413 NULL_RTX, 0, OPTAB_WIDEN);
10415 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10416 NULL_RTX, 0, OPTAB_WIDEN);
10418 ix86_compare_op0 = tmp;
10419 ix86_compare_op1 = const0_rtx;
10420 ix86_expand_branch (code, label);
10424 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10425 op1 is a constant and the low word is zero, then we can just
10426 examine the high word. */
10428 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10431 case LT: case LTU: case GE: case GEU:
10432 ix86_compare_op0 = hi[0];
10433 ix86_compare_op1 = hi[1];
10434 ix86_expand_branch (code, label);
10440 /* Otherwise, we need two or three jumps. */
10442 label2 = gen_label_rtx ();
10445 code2 = swap_condition (code);
10446 code3 = unsigned_condition (code);
10450 case LT: case GT: case LTU: case GTU:
10453 case LE: code1 = LT; code2 = GT; break;
10454 case GE: code1 = GT; code2 = LT; break;
10455 case LEU: code1 = LTU; code2 = GTU; break;
10456 case GEU: code1 = GTU; code2 = LTU; break;
10458 case EQ: code1 = UNKNOWN; code2 = NE; break;
10459 case NE: code2 = UNKNOWN; break;
10462 gcc_unreachable ();
10467 * if (hi(a) < hi(b)) goto true;
10468 * if (hi(a) > hi(b)) goto false;
10469 * if (lo(a) < lo(b)) goto true;
10473 ix86_compare_op0 = hi[0];
10474 ix86_compare_op1 = hi[1];
10476 if (code1 != UNKNOWN)
10477 ix86_expand_branch (code1, label);
10478 if (code2 != UNKNOWN)
10479 ix86_expand_branch (code2, label2);
10481 ix86_compare_op0 = lo[0];
10482 ix86_compare_op1 = lo[1];
10483 ix86_expand_branch (code3, label);
10485 if (code2 != UNKNOWN)
10486 emit_label (label2);
10491 gcc_unreachable ();
10495 /* Split branch based on floating point condition. */
10497 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10498 rtx target1, rtx target2, rtx tmp, rtx pushed)
10500 rtx second, bypass;
10501 rtx label = NULL_RTX;
10503 int bypass_probability = -1, second_probability = -1, probability = -1;
10506 if (target2 != pc_rtx)
10509 code = reverse_condition_maybe_unordered (code);
10514 condition = ix86_expand_fp_compare (code, op1, op2,
10515 tmp, &second, &bypass);
10517 /* Remove pushed operand from stack. */
10519 ix86_free_from_memory (GET_MODE (pushed));
10521 if (split_branch_probability >= 0)
10523 /* Distribute the probabilities across the jumps.
10524 Assume the BYPASS and SECOND to be always test
10526 probability = split_branch_probability;
10528 /* Value of 1 is low enough to make no need for probability
10529 to be updated. Later we may run some experiments and see
10530 if unordered values are more frequent in practice. */
10532 bypass_probability = 1;
10534 second_probability = 1;
10536 if (bypass != NULL_RTX)
10538 label = gen_label_rtx ();
10539 i = emit_jump_insn (gen_rtx_SET
10541 gen_rtx_IF_THEN_ELSE (VOIDmode,
10543 gen_rtx_LABEL_REF (VOIDmode,
10546 if (bypass_probability >= 0)
10548 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10549 GEN_INT (bypass_probability),
10552 i = emit_jump_insn (gen_rtx_SET
10554 gen_rtx_IF_THEN_ELSE (VOIDmode,
10555 condition, target1, target2)));
10556 if (probability >= 0)
10558 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10559 GEN_INT (probability),
10561 if (second != NULL_RTX)
10563 i = emit_jump_insn (gen_rtx_SET
10565 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10567 if (second_probability >= 0)
10569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10570 GEN_INT (second_probability),
10573 if (label != NULL_RTX)
10574 emit_label (label);
10578 ix86_expand_setcc (enum rtx_code code, rtx dest)
10580 rtx ret, tmp, tmpreg, equiv;
10581 rtx second_test, bypass_test;
10583 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10584 return 0; /* FAIL */
10586 gcc_assert (GET_MODE (dest) == QImode);
10588 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10589 PUT_MODE (ret, QImode);
10594 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10595 if (bypass_test || second_test)
10597 rtx test = second_test;
10599 rtx tmp2 = gen_reg_rtx (QImode);
10602 gcc_assert (!second_test);
10603 test = bypass_test;
10605 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10607 PUT_MODE (test, QImode);
10608 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10611 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10613 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10616 /* Attach a REG_EQUAL note describing the comparison result. */
10617 if (ix86_compare_op0 && ix86_compare_op1)
10619 equiv = simplify_gen_relational (code, QImode,
10620 GET_MODE (ix86_compare_op0),
10621 ix86_compare_op0, ix86_compare_op1);
10622 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10625 return 1; /* DONE */
10628 /* Expand comparison setting or clearing carry flag. Return true when
10629 successful and set pop for the operation. */
10631 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10633 enum machine_mode mode =
10634 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10636 /* Do not handle DImode compares that go through special path. Also we can't
10637 deal with FP compares yet. This is possible to add. */
10638 if (mode == (TARGET_64BIT ? TImode : DImode))
10640 if (FLOAT_MODE_P (mode))
10642 rtx second_test = NULL, bypass_test = NULL;
10643 rtx compare_op, compare_seq;
10645 /* Shortcut: following common codes never translate into carry flag compares. */
10646 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10647 || code == ORDERED || code == UNORDERED)
10650 /* These comparisons require zero flag; swap operands so they won't. */
10651 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10652 && !TARGET_IEEE_FP)
10657 code = swap_condition (code);
10660 /* Try to expand the comparison and verify that we end up with carry flag
10661 based comparison. This is fails to be true only when we decide to expand
10662 comparison using arithmetic that is not too common scenario. */
10664 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10665 &second_test, &bypass_test);
10666 compare_seq = get_insns ();
10669 if (second_test || bypass_test)
10671 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10672 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10673 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10675 code = GET_CODE (compare_op);
10676 if (code != LTU && code != GEU)
10678 emit_insn (compare_seq);
10682 if (!INTEGRAL_MODE_P (mode))
10690 /* Convert a==0 into (unsigned)a<1. */
10693 if (op1 != const0_rtx)
10696 code = (code == EQ ? LTU : GEU);
10699 /* Convert a>b into b<a or a>=b-1. */
10702 if (GET_CODE (op1) == CONST_INT)
10704 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10705 /* Bail out on overflow. We still can swap operands but that
10706 would force loading of the constant into register. */
10707 if (op1 == const0_rtx
10708 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10710 code = (code == GTU ? GEU : LTU);
10717 code = (code == GTU ? LTU : GEU);
10721 /* Convert a>=0 into (unsigned)a<0x80000000. */
10724 if (mode == DImode || op1 != const0_rtx)
10726 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10727 code = (code == LT ? GEU : LTU);
10731 if (mode == DImode || op1 != constm1_rtx)
10733 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10734 code = (code == LE ? GEU : LTU);
10740 /* Swapping operands may cause constant to appear as first operand. */
10741 if (!nonimmediate_operand (op0, VOIDmode))
10743 if (no_new_pseudos)
10745 op0 = force_reg (mode, op0);
10747 ix86_compare_op0 = op0;
10748 ix86_compare_op1 = op1;
10749 *pop = ix86_expand_compare (code, NULL, NULL);
10750 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10755 ix86_expand_int_movcc (rtx operands[])
10757 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10758 rtx compare_seq, compare_op;
10759 rtx second_test, bypass_test;
10760 enum machine_mode mode = GET_MODE (operands[0]);
10761 bool sign_bit_compare_p = false;;
10764 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10765 compare_seq = get_insns ();
10768 compare_code = GET_CODE (compare_op);
10770 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10771 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10772 sign_bit_compare_p = true;
10774 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10775 HImode insns, we'd be swallowed in word prefix ops. */
10777 if ((mode != HImode || TARGET_FAST_PREFIX)
10778 && (mode != (TARGET_64BIT ? TImode : DImode))
10779 && GET_CODE (operands[2]) == CONST_INT
10780 && GET_CODE (operands[3]) == CONST_INT)
10782 rtx out = operands[0];
10783 HOST_WIDE_INT ct = INTVAL (operands[2]);
10784 HOST_WIDE_INT cf = INTVAL (operands[3]);
10785 HOST_WIDE_INT diff;
10788 /* Sign bit compares are better done using shifts than we do by using
10790 if (sign_bit_compare_p
10791 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10792 ix86_compare_op1, &compare_op))
10794 /* Detect overlap between destination and compare sources. */
10797 if (!sign_bit_compare_p)
10799 bool fpcmp = false;
10801 compare_code = GET_CODE (compare_op);
10803 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10804 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10807 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10810 /* To simplify rest of code, restrict to the GEU case. */
10811 if (compare_code == LTU)
10813 HOST_WIDE_INT tmp = ct;
10816 compare_code = reverse_condition (compare_code);
10817 code = reverse_condition (code);
10822 PUT_CODE (compare_op,
10823 reverse_condition_maybe_unordered
10824 (GET_CODE (compare_op)));
10826 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10830 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10831 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10832 tmp = gen_reg_rtx (mode);
10834 if (mode == DImode)
10835 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10837 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10841 if (code == GT || code == GE)
10842 code = reverse_condition (code);
10845 HOST_WIDE_INT tmp = ct;
10850 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10851 ix86_compare_op1, VOIDmode, 0, -1);
10864 tmp = expand_simple_binop (mode, PLUS,
10866 copy_rtx (tmp), 1, OPTAB_DIRECT);
10877 tmp = expand_simple_binop (mode, IOR,
10879 copy_rtx (tmp), 1, OPTAB_DIRECT);
10881 else if (diff == -1 && ct)
10891 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10893 tmp = expand_simple_binop (mode, PLUS,
10894 copy_rtx (tmp), GEN_INT (cf),
10895 copy_rtx (tmp), 1, OPTAB_DIRECT);
10903 * andl cf - ct, dest
10913 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10916 tmp = expand_simple_binop (mode, AND,
10918 gen_int_mode (cf - ct, mode),
10919 copy_rtx (tmp), 1, OPTAB_DIRECT);
10921 tmp = expand_simple_binop (mode, PLUS,
10922 copy_rtx (tmp), GEN_INT (ct),
10923 copy_rtx (tmp), 1, OPTAB_DIRECT);
10926 if (!rtx_equal_p (tmp, out))
10927 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10929 return 1; /* DONE */
10935 tmp = ct, ct = cf, cf = tmp;
10937 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10939 /* We may be reversing unordered compare to normal compare, that
10940 is not valid in general (we may convert non-trapping condition
10941 to trapping one), however on i386 we currently emit all
10942 comparisons unordered. */
10943 compare_code = reverse_condition_maybe_unordered (compare_code);
10944 code = reverse_condition_maybe_unordered (code);
10948 compare_code = reverse_condition (compare_code);
10949 code = reverse_condition (code);
10953 compare_code = UNKNOWN;
10954 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10955 && GET_CODE (ix86_compare_op1) == CONST_INT)
10957 if (ix86_compare_op1 == const0_rtx
10958 && (code == LT || code == GE))
10959 compare_code = code;
10960 else if (ix86_compare_op1 == constm1_rtx)
10964 else if (code == GT)
10969 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10970 if (compare_code != UNKNOWN
10971 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10972 && (cf == -1 || ct == -1))
10974 /* If lea code below could be used, only optimize
10975 if it results in a 2 insn sequence. */
10977 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10978 || diff == 3 || diff == 5 || diff == 9)
10979 || (compare_code == LT && ct == -1)
10980 || (compare_code == GE && cf == -1))
10983 * notl op1 (if necessary)
10991 code = reverse_condition (code);
10994 out = emit_store_flag (out, code, ix86_compare_op0,
10995 ix86_compare_op1, VOIDmode, 0, -1);
10997 out = expand_simple_binop (mode, IOR,
10999 out, 1, OPTAB_DIRECT);
11000 if (out != operands[0])
11001 emit_move_insn (operands[0], out);
11003 return 1; /* DONE */
11008 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11009 || diff == 3 || diff == 5 || diff == 9)
11010 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11012 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11018 * lea cf(dest*(ct-cf)),dest
11022 * This also catches the degenerate setcc-only case.
11028 out = emit_store_flag (out, code, ix86_compare_op0,
11029 ix86_compare_op1, VOIDmode, 0, 1);
11032 /* On x86_64 the lea instruction operates on Pmode, so we need
11033 to get arithmetics done in proper mode to match. */
11035 tmp = copy_rtx (out);
11039 out1 = copy_rtx (out);
11040 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11044 tmp = gen_rtx_PLUS (mode, tmp, out1);
11050 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11053 if (!rtx_equal_p (tmp, out))
11056 out = force_operand (tmp, copy_rtx (out));
11058 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11060 if (!rtx_equal_p (out, operands[0]))
11061 emit_move_insn (operands[0], copy_rtx (out));
11063 return 1; /* DONE */
11067 * General case: Jumpful:
11068 * xorl dest,dest cmpl op1, op2
11069 * cmpl op1, op2 movl ct, dest
11070 * setcc dest jcc 1f
11071 * decl dest movl cf, dest
11072 * andl (cf-ct),dest 1:
11075 * Size 20. Size 14.
11077 * This is reasonably steep, but branch mispredict costs are
11078 * high on modern cpus, so consider failing only if optimizing
11082 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11083 && BRANCH_COST >= 2)
11089 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11090 /* We may be reversing unordered compare to normal compare,
11091 that is not valid in general (we may convert non-trapping
11092 condition to trapping one), however on i386 we currently
11093 emit all comparisons unordered. */
11094 code = reverse_condition_maybe_unordered (code);
11097 code = reverse_condition (code);
11098 if (compare_code != UNKNOWN)
11099 compare_code = reverse_condition (compare_code);
11103 if (compare_code != UNKNOWN)
11105 /* notl op1 (if needed)
11110 For x < 0 (resp. x <= -1) there will be no notl,
11111 so if possible swap the constants to get rid of the
11113 True/false will be -1/0 while code below (store flag
11114 followed by decrement) is 0/-1, so the constants need
11115 to be exchanged once more. */
11117 if (compare_code == GE || !cf)
11119 code = reverse_condition (code);
11124 HOST_WIDE_INT tmp = cf;
11129 out = emit_store_flag (out, code, ix86_compare_op0,
11130 ix86_compare_op1, VOIDmode, 0, -1);
11134 out = emit_store_flag (out, code, ix86_compare_op0,
11135 ix86_compare_op1, VOIDmode, 0, 1);
11137 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11138 copy_rtx (out), 1, OPTAB_DIRECT);
11141 out = expand_simple_binop (mode, AND, copy_rtx (out),
11142 gen_int_mode (cf - ct, mode),
11143 copy_rtx (out), 1, OPTAB_DIRECT);
11145 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11146 copy_rtx (out), 1, OPTAB_DIRECT);
11147 if (!rtx_equal_p (out, operands[0]))
11148 emit_move_insn (operands[0], copy_rtx (out));
11150 return 1; /* DONE */
11154 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11156 /* Try a few things more with specific constants and a variable. */
11159 rtx var, orig_out, out, tmp;
11161 if (BRANCH_COST <= 2)
11162 return 0; /* FAIL */
11164 /* If one of the two operands is an interesting constant, load a
11165 constant with the above and mask it in with a logical operation. */
11167 if (GET_CODE (operands[2]) == CONST_INT)
11170 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11171 operands[3] = constm1_rtx, op = and_optab;
11172 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11173 operands[3] = const0_rtx, op = ior_optab;
11175 return 0; /* FAIL */
11177 else if (GET_CODE (operands[3]) == CONST_INT)
11180 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11181 operands[2] = constm1_rtx, op = and_optab;
11182 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11183 operands[2] = const0_rtx, op = ior_optab;
11185 return 0; /* FAIL */
11188 return 0; /* FAIL */
11190 orig_out = operands[0];
11191 tmp = gen_reg_rtx (mode);
11194 /* Recurse to get the constant loaded. */
11195 if (ix86_expand_int_movcc (operands) == 0)
11196 return 0; /* FAIL */
11198 /* Mask in the interesting variable. */
11199 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11201 if (!rtx_equal_p (out, orig_out))
11202 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11204 return 1; /* DONE */
11208 * For comparison with above,
11218 if (! nonimmediate_operand (operands[2], mode))
11219 operands[2] = force_reg (mode, operands[2]);
11220 if (! nonimmediate_operand (operands[3], mode))
11221 operands[3] = force_reg (mode, operands[3]);
11223 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11225 rtx tmp = gen_reg_rtx (mode);
11226 emit_move_insn (tmp, operands[3]);
11229 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11231 rtx tmp = gen_reg_rtx (mode);
11232 emit_move_insn (tmp, operands[2]);
11236 if (! register_operand (operands[2], VOIDmode)
11238 || ! register_operand (operands[3], VOIDmode)))
11239 operands[2] = force_reg (mode, operands[2]);
11242 && ! register_operand (operands[3], VOIDmode))
11243 operands[3] = force_reg (mode, operands[3]);
11245 emit_insn (compare_seq);
11246 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11247 gen_rtx_IF_THEN_ELSE (mode,
11248 compare_op, operands[2],
11251 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11252 gen_rtx_IF_THEN_ELSE (mode,
11254 copy_rtx (operands[3]),
11255 copy_rtx (operands[0]))));
11257 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11258 gen_rtx_IF_THEN_ELSE (mode,
11260 copy_rtx (operands[2]),
11261 copy_rtx (operands[0]))));
11263 return 1; /* DONE */
11266 /* Swap, force into registers, or otherwise massage the two operands
11267 to an sse comparison with a mask result. Thus we differ a bit from
11268 ix86_prepare_fp_compare_args which expects to produce a flags result.
11270 The DEST operand exists to help determine whether to commute commutative
11271 operators. The POP0/POP1 operands are updated in place. The new
11272 comparison code is returned, or UNKNOWN if not implementable. */
11274 static enum rtx_code
11275 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11276 rtx *pop0, rtx *pop1)
11284 /* We have no LTGT as an operator. We could implement it with
11285 NE & ORDERED, but this requires an extra temporary. It's
11286 not clear that it's worth it. */
11293 /* These are supported directly. */
11300 /* For commutative operators, try to canonicalize the destination
11301 operand to be first in the comparison - this helps reload to
11302 avoid extra moves. */
11303 if (!dest || !rtx_equal_p (dest, *pop1))
11311 /* These are not supported directly. Swap the comparison operands
11312 to transform into something that is supported. */
11316 code = swap_condition (code);
11320 gcc_unreachable ();
11326 /* Detect conditional moves that exactly match min/max operational
11327 semantics. Note that this is IEEE safe, as long as we don't
11328 interchange the operands.
11330 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11331 and TRUE if the operation is successful and instructions are emitted. */
11334 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11335 rtx cmp_op1, rtx if_true, rtx if_false)
11337 enum machine_mode mode;
11343 else if (code == UNGE)
11346 if_true = if_false;
11352 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11354 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11359 mode = GET_MODE (dest);
11361 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11362 but MODE may be a vector mode and thus not appropriate. */
11363 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11365 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11368 if_true = force_reg (mode, if_true);
11369 v = gen_rtvec (2, if_true, if_false);
11370 tmp = gen_rtx_UNSPEC (mode, v, u);
11374 code = is_min ? SMIN : SMAX;
11375 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11378 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11382 /* Expand an sse vector comparison. Return the register with the result. */
11385 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11386 rtx op_true, rtx op_false)
11388 enum machine_mode mode = GET_MODE (dest);
11391 cmp_op0 = force_reg (mode, cmp_op0);
11392 if (!nonimmediate_operand (cmp_op1, mode))
11393 cmp_op1 = force_reg (mode, cmp_op1);
11396 || reg_overlap_mentioned_p (dest, op_true)
11397 || reg_overlap_mentioned_p (dest, op_false))
11398 dest = gen_reg_rtx (mode);
11400 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11401 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11406 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11407 operations. This is used for both scalar and vector conditional moves. */
11410 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11412 enum machine_mode mode = GET_MODE (dest);
11415 if (op_false == CONST0_RTX (mode))
11417 op_true = force_reg (mode, op_true);
11418 x = gen_rtx_AND (mode, cmp, op_true);
11419 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11421 else if (op_true == CONST0_RTX (mode))
11423 op_false = force_reg (mode, op_false);
11424 x = gen_rtx_NOT (mode, cmp);
11425 x = gen_rtx_AND (mode, x, op_false);
11426 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11430 op_true = force_reg (mode, op_true);
11431 op_false = force_reg (mode, op_false);
11433 t2 = gen_reg_rtx (mode);
11435 t3 = gen_reg_rtx (mode);
11439 x = gen_rtx_AND (mode, op_true, cmp);
11440 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11442 x = gen_rtx_NOT (mode, cmp);
11443 x = gen_rtx_AND (mode, x, op_false);
11444 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11446 x = gen_rtx_IOR (mode, t3, t2);
11447 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11451 /* Expand a floating-point conditional move. Return true if successful. */
11454 ix86_expand_fp_movcc (rtx operands[])
11456 enum machine_mode mode = GET_MODE (operands[0]);
11457 enum rtx_code code = GET_CODE (operands[1]);
11458 rtx tmp, compare_op, second_test, bypass_test;
11460 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11462 enum machine_mode cmode;
11464 /* Since we've no cmove for sse registers, don't force bad register
11465 allocation just to gain access to it. Deny movcc when the
11466 comparison mode doesn't match the move mode. */
11467 cmode = GET_MODE (ix86_compare_op0);
11468 if (cmode == VOIDmode)
11469 cmode = GET_MODE (ix86_compare_op1);
11473 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11475 &ix86_compare_op1);
11476 if (code == UNKNOWN)
11479 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11480 ix86_compare_op1, operands[2],
11484 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11485 ix86_compare_op1, operands[2], operands[3]);
11486 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11490 /* The floating point conditional move instructions don't directly
11491 support conditions resulting from a signed integer comparison. */
11493 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11495 /* The floating point conditional move instructions don't directly
11496 support signed integer comparisons. */
11498 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11500 gcc_assert (!second_test && !bypass_test);
11501 tmp = gen_reg_rtx (QImode);
11502 ix86_expand_setcc (code, tmp);
11504 ix86_compare_op0 = tmp;
11505 ix86_compare_op1 = const0_rtx;
11506 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11508 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11510 tmp = gen_reg_rtx (mode);
11511 emit_move_insn (tmp, operands[3]);
11514 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11516 tmp = gen_reg_rtx (mode);
11517 emit_move_insn (tmp, operands[2]);
11521 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11522 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11523 operands[2], operands[3])));
11525 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11526 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11527 operands[3], operands[0])));
11529 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11530 gen_rtx_IF_THEN_ELSE (mode, second_test,
11531 operands[2], operands[0])));
11536 /* Expand a floating-point vector conditional move; a vcond operation
11537 rather than a movcc operation. */
11540 ix86_expand_fp_vcond (rtx operands[])
11542 enum rtx_code code = GET_CODE (operands[3]);
11545 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11546 &operands[4], &operands[5]);
11547 if (code == UNKNOWN)
11550 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11551 operands[5], operands[1], operands[2]))
11554 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11555 operands[1], operands[2]);
11556 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11560 /* Expand a signed integral vector conditional move. */
11563 ix86_expand_int_vcond (rtx operands[])
11565 enum machine_mode mode = GET_MODE (operands[0]);
11566 enum rtx_code code = GET_CODE (operands[3]);
11567 bool negate = false;
11570 cop0 = operands[4];
11571 cop1 = operands[5];
11573 /* Canonicalize the comparison to EQ, GT, GTU. */
11584 code = reverse_condition (code);
11590 code = reverse_condition (code);
11596 code = swap_condition (code);
11597 x = cop0, cop0 = cop1, cop1 = x;
11601 gcc_unreachable ();
11604 /* Unsigned parallel compare is not supported by the hardware. Play some
11605 tricks to turn this into a signed comparison against 0. */
11608 cop0 = force_reg (mode, cop0);
11616 /* Perform a parallel modulo subtraction. */
11617 t1 = gen_reg_rtx (mode);
11618 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11620 /* Extract the original sign bit of op0. */
11621 mask = GEN_INT (-0x80000000);
11622 mask = gen_rtx_CONST_VECTOR (mode,
11623 gen_rtvec (4, mask, mask, mask, mask));
11624 mask = force_reg (mode, mask);
11625 t2 = gen_reg_rtx (mode);
11626 emit_insn (gen_andv4si3 (t2, cop0, mask));
11628 /* XOR it back into the result of the subtraction. This results
11629 in the sign bit set iff we saw unsigned underflow. */
11630 x = gen_reg_rtx (mode);
11631 emit_insn (gen_xorv4si3 (x, t1, t2));
11639 /* Perform a parallel unsigned saturating subtraction. */
11640 x = gen_reg_rtx (mode);
11641 emit_insn (gen_rtx_SET (VOIDmode, x,
11642 gen_rtx_US_MINUS (mode, cop0, cop1)));
11649 gcc_unreachable ();
11653 cop1 = CONST0_RTX (mode);
11656 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11657 operands[1+negate], operands[2-negate]);
11659 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11660 operands[2-negate]);
11664 /* Expand conditional increment or decrement using adb/sbb instructions.
11665 The default case using setcc followed by the conditional move can be
11666 done by generic code. */
11668 ix86_expand_int_addcc (rtx operands[])
11670 enum rtx_code code = GET_CODE (operands[1]);
11672 rtx val = const0_rtx;
11673 bool fpcmp = false;
11674 enum machine_mode mode = GET_MODE (operands[0]);
11676 if (operands[3] != const1_rtx
11677 && operands[3] != constm1_rtx)
11679 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11680 ix86_compare_op1, &compare_op))
11682 code = GET_CODE (compare_op);
11684 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11685 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11688 code = ix86_fp_compare_code_to_integer (code);
11695 PUT_CODE (compare_op,
11696 reverse_condition_maybe_unordered
11697 (GET_CODE (compare_op)));
11699 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11701 PUT_MODE (compare_op, mode);
11703 /* Construct either adc or sbb insn. */
11704 if ((code == LTU) == (operands[3] == constm1_rtx))
11706 switch (GET_MODE (operands[0]))
11709 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11712 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11715 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11718 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11721 gcc_unreachable ();
11726 switch (GET_MODE (operands[0]))
11729 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11732 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11735 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11738 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11741 gcc_unreachable ();
11744 return 1; /* DONE */
11748 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11749 works for floating pointer parameters and nonoffsetable memories.
11750 For pushes, it returns just stack offsets; the values will be saved
11751 in the right order. Maximally three parts are generated. */
11754 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11759 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11761 size = (GET_MODE_SIZE (mode) + 4) / 8;
11763 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11764 gcc_assert (size >= 2 && size <= 3);
11766 /* Optimize constant pool reference to immediates. This is used by fp
11767 moves, that force all constants to memory to allow combining. */
11768 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11770 rtx tmp = maybe_get_pool_constant (operand);
11775 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11777 /* The only non-offsetable memories we handle are pushes. */
11778 int ok = push_operand (operand, VOIDmode);
11782 operand = copy_rtx (operand);
11783 PUT_MODE (operand, Pmode);
11784 parts[0] = parts[1] = parts[2] = operand;
11788 if (GET_CODE (operand) == CONST_VECTOR)
11790 enum machine_mode imode = int_mode_for_mode (mode);
11791 /* Caution: if we looked through a constant pool memory above,
11792 the operand may actually have a different mode now. That's
11793 ok, since we want to pun this all the way back to an integer. */
11794 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11795 gcc_assert (operand != NULL);
11801 if (mode == DImode)
11802 split_di (&operand, 1, &parts[0], &parts[1]);
11805 if (REG_P (operand))
11807 gcc_assert (reload_completed);
11808 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11809 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11811 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11813 else if (offsettable_memref_p (operand))
11815 operand = adjust_address (operand, SImode, 0);
11816 parts[0] = operand;
11817 parts[1] = adjust_address (operand, SImode, 4);
11819 parts[2] = adjust_address (operand, SImode, 8);
11821 else if (GET_CODE (operand) == CONST_DOUBLE)
11826 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11830 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11831 parts[2] = gen_int_mode (l[2], SImode);
11834 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11837 gcc_unreachable ();
11839 parts[1] = gen_int_mode (l[1], SImode);
11840 parts[0] = gen_int_mode (l[0], SImode);
11843 gcc_unreachable ();
11848 if (mode == TImode)
11849 split_ti (&operand, 1, &parts[0], &parts[1]);
11850 if (mode == XFmode || mode == TFmode)
11852 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11853 if (REG_P (operand))
11855 gcc_assert (reload_completed);
11856 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11857 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11859 else if (offsettable_memref_p (operand))
11861 operand = adjust_address (operand, DImode, 0);
11862 parts[0] = operand;
11863 parts[1] = adjust_address (operand, upper_mode, 8);
11865 else if (GET_CODE (operand) == CONST_DOUBLE)
11870 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11871 real_to_target (l, &r, mode);
11873 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11874 if (HOST_BITS_PER_WIDE_INT >= 64)
11877 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11878 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11881 parts[0] = immed_double_const (l[0], l[1], DImode);
11883 if (upper_mode == SImode)
11884 parts[1] = gen_int_mode (l[2], SImode);
11885 else if (HOST_BITS_PER_WIDE_INT >= 64)
11888 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11889 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11892 parts[1] = immed_double_const (l[2], l[3], DImode);
11895 gcc_unreachable ();
11902 /* Emit insns to perform a move or push of DI, DF, and XF values.
11903 Return false when normal moves are needed; true when all required
11904 insns have been emitted. Operands 2-4 contain the input values
11905 int the correct order; operands 5-7 contain the output values. */
11908 ix86_split_long_move (rtx operands[])
11913 int collisions = 0;
11914 enum machine_mode mode = GET_MODE (operands[0]);
11916 /* The DFmode expanders may ask us to move double.
11917 For 64bit target this is single move. By hiding the fact
11918 here we simplify i386.md splitters. */
11919 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11921 /* Optimize constant pool reference to immediates. This is used by
11922 fp moves, that force all constants to memory to allow combining. */
11924 if (GET_CODE (operands[1]) == MEM
11925 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11926 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11927 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11928 if (push_operand (operands[0], VOIDmode))
11930 operands[0] = copy_rtx (operands[0]);
11931 PUT_MODE (operands[0], Pmode);
11934 operands[0] = gen_lowpart (DImode, operands[0]);
11935 operands[1] = gen_lowpart (DImode, operands[1]);
11936 emit_move_insn (operands[0], operands[1]);
11940 /* The only non-offsettable memory we handle is push. */
11941 if (push_operand (operands[0], VOIDmode))
11944 gcc_assert (GET_CODE (operands[0]) != MEM
11945 || offsettable_memref_p (operands[0]));
11947 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11948 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11950 /* When emitting push, take care for source operands on the stack. */
11951 if (push && GET_CODE (operands[1]) == MEM
11952 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11955 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11956 XEXP (part[1][2], 0));
11957 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11958 XEXP (part[1][1], 0));
11961 /* We need to do copy in the right order in case an address register
11962 of the source overlaps the destination. */
11963 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11965 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11967 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11970 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11973 /* Collision in the middle part can be handled by reordering. */
11974 if (collisions == 1 && nparts == 3
11975 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11978 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11979 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11982 /* If there are more collisions, we can't handle it by reordering.
11983 Do an lea to the last part and use only one colliding move. */
11984 else if (collisions > 1)
11990 base = part[0][nparts - 1];
11992 /* Handle the case when the last part isn't valid for lea.
11993 Happens in 64-bit mode storing the 12-byte XFmode. */
11994 if (GET_MODE (base) != Pmode)
11995 base = gen_rtx_REG (Pmode, REGNO (base));
11997 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11998 part[1][0] = replace_equiv_address (part[1][0], base);
11999 part[1][1] = replace_equiv_address (part[1][1],
12000 plus_constant (base, UNITS_PER_WORD));
12002 part[1][2] = replace_equiv_address (part[1][2],
12003 plus_constant (base, 8));
12013 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12014 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12015 emit_move_insn (part[0][2], part[1][2]);
12020 /* In 64bit mode we don't have 32bit push available. In case this is
12021 register, it is OK - we will just use larger counterpart. We also
12022 retype memory - these comes from attempt to avoid REX prefix on
12023 moving of second half of TFmode value. */
12024 if (GET_MODE (part[1][1]) == SImode)
12026 switch (GET_CODE (part[1][1]))
12029 part[1][1] = adjust_address (part[1][1], DImode, 0);
12033 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12037 gcc_unreachable ();
12040 if (GET_MODE (part[1][0]) == SImode)
12041 part[1][0] = part[1][1];
12044 emit_move_insn (part[0][1], part[1][1]);
12045 emit_move_insn (part[0][0], part[1][0]);
12049 /* Choose correct order to not overwrite the source before it is copied. */
12050 if ((REG_P (part[0][0])
12051 && REG_P (part[1][1])
12052 && (REGNO (part[0][0]) == REGNO (part[1][1])
12054 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12056 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12060 operands[2] = part[0][2];
12061 operands[3] = part[0][1];
12062 operands[4] = part[0][0];
12063 operands[5] = part[1][2];
12064 operands[6] = part[1][1];
12065 operands[7] = part[1][0];
12069 operands[2] = part[0][1];
12070 operands[3] = part[0][0];
12071 operands[5] = part[1][1];
12072 operands[6] = part[1][0];
12079 operands[2] = part[0][0];
12080 operands[3] = part[0][1];
12081 operands[4] = part[0][2];
12082 operands[5] = part[1][0];
12083 operands[6] = part[1][1];
12084 operands[7] = part[1][2];
12088 operands[2] = part[0][0];
12089 operands[3] = part[0][1];
12090 operands[5] = part[1][0];
12091 operands[6] = part[1][1];
12095 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12098 if (GET_CODE (operands[5]) == CONST_INT
12099 && operands[5] != const0_rtx
12100 && REG_P (operands[2]))
12102 if (GET_CODE (operands[6]) == CONST_INT
12103 && INTVAL (operands[6]) == INTVAL (operands[5]))
12104 operands[6] = operands[2];
12107 && GET_CODE (operands[7]) == CONST_INT
12108 && INTVAL (operands[7]) == INTVAL (operands[5]))
12109 operands[7] = operands[2];
12113 && GET_CODE (operands[6]) == CONST_INT
12114 && operands[6] != const0_rtx
12115 && REG_P (operands[3])
12116 && GET_CODE (operands[7]) == CONST_INT
12117 && INTVAL (operands[7]) == INTVAL (operands[6]))
12118 operands[7] = operands[3];
12121 emit_move_insn (operands[2], operands[5]);
12122 emit_move_insn (operands[3], operands[6]);
12124 emit_move_insn (operands[4], operands[7]);
12129 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12130 left shift by a constant, either using a single shift or
12131 a sequence of add instructions. */
12134 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12138 emit_insn ((mode == DImode
12140 : gen_adddi3) (operand, operand, operand));
12142 else if (!optimize_size
12143 && count * ix86_cost->add <= ix86_cost->shift_const)
12146 for (i=0; i<count; i++)
12148 emit_insn ((mode == DImode
12150 : gen_adddi3) (operand, operand, operand));
12154 emit_insn ((mode == DImode
12156 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12160 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12162 rtx low[2], high[2];
12164 const int single_width = mode == DImode ? 32 : 64;
12166 if (GET_CODE (operands[2]) == CONST_INT)
12168 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12169 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12171 if (count >= single_width)
12173 emit_move_insn (high[0], low[1]);
12174 emit_move_insn (low[0], const0_rtx);
12176 if (count > single_width)
12177 ix86_expand_ashl_const (high[0], count - single_width, mode);
12181 if (!rtx_equal_p (operands[0], operands[1]))
12182 emit_move_insn (operands[0], operands[1]);
12183 emit_insn ((mode == DImode
12185 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12186 ix86_expand_ashl_const (low[0], count, mode);
12191 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12193 if (operands[1] == const1_rtx)
12195 /* Assuming we've chosen a QImode capable registers, then 1 << N
12196 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12197 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12199 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12201 ix86_expand_clear (low[0]);
12202 ix86_expand_clear (high[0]);
12203 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12205 d = gen_lowpart (QImode, low[0]);
12206 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12207 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12208 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12210 d = gen_lowpart (QImode, high[0]);
12211 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12212 s = gen_rtx_NE (QImode, flags, const0_rtx);
12213 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12216 /* Otherwise, we can get the same results by manually performing
12217 a bit extract operation on bit 5/6, and then performing the two
12218 shifts. The two methods of getting 0/1 into low/high are exactly
12219 the same size. Avoiding the shift in the bit extract case helps
12220 pentium4 a bit; no one else seems to care much either way. */
12225 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12226 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12228 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12229 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12231 emit_insn ((mode == DImode
12233 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12234 emit_insn ((mode == DImode
12236 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12237 emit_move_insn (low[0], high[0]);
12238 emit_insn ((mode == DImode
12240 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12243 emit_insn ((mode == DImode
12245 : gen_ashldi3) (low[0], low[0], operands[2]));
12246 emit_insn ((mode == DImode
12248 : gen_ashldi3) (high[0], high[0], operands[2]));
12252 if (operands[1] == constm1_rtx)
12254 /* For -1 << N, we can avoid the shld instruction, because we
12255 know that we're shifting 0...31/63 ones into a -1. */
12256 emit_move_insn (low[0], constm1_rtx);
12258 emit_move_insn (high[0], low[0]);
12260 emit_move_insn (high[0], constm1_rtx);
12264 if (!rtx_equal_p (operands[0], operands[1]))
12265 emit_move_insn (operands[0], operands[1]);
12267 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12268 emit_insn ((mode == DImode
12270 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12273 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12275 if (TARGET_CMOVE && scratch)
12277 ix86_expand_clear (scratch);
12278 emit_insn ((mode == DImode
12279 ? gen_x86_shift_adj_1
12280 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12283 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12287 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12289 rtx low[2], high[2];
12291 const int single_width = mode == DImode ? 32 : 64;
12293 if (GET_CODE (operands[2]) == CONST_INT)
12295 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12296 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12298 if (count == single_width * 2 - 1)
12300 emit_move_insn (high[0], high[1]);
12301 emit_insn ((mode == DImode
12303 : gen_ashrdi3) (high[0], high[0],
12304 GEN_INT (single_width - 1)));
12305 emit_move_insn (low[0], high[0]);
12308 else if (count >= single_width)
12310 emit_move_insn (low[0], high[1]);
12311 emit_move_insn (high[0], low[0]);
12312 emit_insn ((mode == DImode
12314 : gen_ashrdi3) (high[0], high[0],
12315 GEN_INT (single_width - 1)));
12316 if (count > single_width)
12317 emit_insn ((mode == DImode
12319 : gen_ashrdi3) (low[0], low[0],
12320 GEN_INT (count - single_width)));
12324 if (!rtx_equal_p (operands[0], operands[1]))
12325 emit_move_insn (operands[0], operands[1]);
12326 emit_insn ((mode == DImode
12328 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12329 emit_insn ((mode == DImode
12331 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12336 if (!rtx_equal_p (operands[0], operands[1]))
12337 emit_move_insn (operands[0], operands[1]);
12339 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12341 emit_insn ((mode == DImode
12343 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12344 emit_insn ((mode == DImode
12346 : gen_ashrdi3) (high[0], high[0], operands[2]));
12348 if (TARGET_CMOVE && scratch)
12350 emit_move_insn (scratch, high[0]);
12351 emit_insn ((mode == DImode
12353 : gen_ashrdi3) (scratch, scratch,
12354 GEN_INT (single_width - 1)));
12355 emit_insn ((mode == DImode
12356 ? gen_x86_shift_adj_1
12357 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12361 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12366 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12368 rtx low[2], high[2];
12370 const int single_width = mode == DImode ? 32 : 64;
12372 if (GET_CODE (operands[2]) == CONST_INT)
12374 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12375 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12377 if (count >= single_width)
12379 emit_move_insn (low[0], high[1]);
12380 ix86_expand_clear (high[0]);
12382 if (count > single_width)
12383 emit_insn ((mode == DImode
12385 : gen_lshrdi3) (low[0], low[0],
12386 GEN_INT (count - single_width)));
12390 if (!rtx_equal_p (operands[0], operands[1]))
12391 emit_move_insn (operands[0], operands[1]);
12392 emit_insn ((mode == DImode
12394 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12395 emit_insn ((mode == DImode
12397 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12402 if (!rtx_equal_p (operands[0], operands[1]))
12403 emit_move_insn (operands[0], operands[1]);
12405 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12407 emit_insn ((mode == DImode
12409 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12410 emit_insn ((mode == DImode
12412 : gen_lshrdi3) (high[0], high[0], operands[2]));
12414 /* Heh. By reversing the arguments, we can reuse this pattern. */
12415 if (TARGET_CMOVE && scratch)
12417 ix86_expand_clear (scratch);
12418 emit_insn ((mode == DImode
12419 ? gen_x86_shift_adj_1
12420 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12424 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12428 /* Helper function for the string operations below. Dest VARIABLE whether
12429 it is aligned to VALUE bytes. If true, jump to the label. */
12431 ix86_expand_aligntest (rtx variable, int value)
12433 rtx label = gen_label_rtx ();
12434 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12435 if (GET_MODE (variable) == DImode)
12436 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12438 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12439 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12444 /* Adjust COUNTER by the VALUE. */
12446 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12448 if (GET_MODE (countreg) == DImode)
12449 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12451 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12454 /* Zero extend possibly SImode EXP to Pmode register. */
12456 ix86_zero_extend_to_Pmode (rtx exp)
12459 if (GET_MODE (exp) == VOIDmode)
12460 return force_reg (Pmode, exp);
12461 if (GET_MODE (exp) == Pmode)
12462 return copy_to_mode_reg (Pmode, exp);
12463 r = gen_reg_rtx (Pmode);
12464 emit_insn (gen_zero_extendsidi2 (r, exp));
12468 /* Expand string move (memcpy) operation. Use i386 string operations when
12469 profitable. expand_clrmem contains similar code. */
12471 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12473 rtx srcreg, destreg, countreg, srcexp, destexp;
12474 enum machine_mode counter_mode;
12475 HOST_WIDE_INT align = 0;
12476 unsigned HOST_WIDE_INT count = 0;
12478 if (GET_CODE (align_exp) == CONST_INT)
12479 align = INTVAL (align_exp);
12481 /* Can't use any of this if the user has appropriated esi or edi. */
12482 if (global_regs[4] || global_regs[5])
12485 /* This simple hack avoids all inlining code and simplifies code below. */
12486 if (!TARGET_ALIGN_STRINGOPS)
12489 if (GET_CODE (count_exp) == CONST_INT)
12491 count = INTVAL (count_exp);
12492 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12496 /* Figure out proper mode for counter. For 32bits it is always SImode,
12497 for 64bits use SImode when possible, otherwise DImode.
12498 Set count to number of bytes copied when known at compile time. */
12500 || GET_MODE (count_exp) == SImode
12501 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12502 counter_mode = SImode;
12504 counter_mode = DImode;
12506 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12508 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12509 if (destreg != XEXP (dst, 0))
12510 dst = replace_equiv_address_nv (dst, destreg);
12511 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12512 if (srcreg != XEXP (src, 0))
12513 src = replace_equiv_address_nv (src, srcreg);
12515 /* When optimizing for size emit simple rep ; movsb instruction for
12516 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12517 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12518 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12519 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12520 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12521 known to be zero or not. The rep; movsb sequence causes higher
12522 register pressure though, so take that into account. */
12524 if ((!optimize || optimize_size)
12529 || (count & 3) + count / 4 > 6))))
12531 emit_insn (gen_cld ());
12532 countreg = ix86_zero_extend_to_Pmode (count_exp);
12533 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12534 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12535 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12539 /* For constant aligned (or small unaligned) copies use rep movsl
12540 followed by code copying the rest. For PentiumPro ensure 8 byte
12541 alignment to allow rep movsl acceleration. */
12543 else if (count != 0
12545 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12546 || optimize_size || count < (unsigned int) 64))
12548 unsigned HOST_WIDE_INT offset = 0;
12549 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12550 rtx srcmem, dstmem;
12552 emit_insn (gen_cld ());
12553 if (count & ~(size - 1))
12555 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12557 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12559 while (offset < (count & ~(size - 1)))
12561 srcmem = adjust_automodify_address_nv (src, movs_mode,
12563 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12565 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12571 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12572 & (TARGET_64BIT ? -1 : 0x3fffffff));
12573 countreg = copy_to_mode_reg (counter_mode, countreg);
12574 countreg = ix86_zero_extend_to_Pmode (countreg);
12576 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12577 GEN_INT (size == 4 ? 2 : 3));
12578 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12579 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12581 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12582 countreg, destexp, srcexp));
12583 offset = count & ~(size - 1);
12586 if (size == 8 && (count & 0x04))
12588 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12590 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12592 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12597 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12599 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12601 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12606 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12608 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12610 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12613 /* The generic code based on the glibc implementation:
12614 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12615 allowing accelerated copying there)
12616 - copy the data using rep movsl
12617 - copy the rest. */
12622 rtx srcmem, dstmem;
12623 int desired_alignment = (TARGET_PENTIUMPRO
12624 && (count == 0 || count >= (unsigned int) 260)
12625 ? 8 : UNITS_PER_WORD);
12626 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12627 dst = change_address (dst, BLKmode, destreg);
12628 src = change_address (src, BLKmode, srcreg);
12630 /* In case we don't know anything about the alignment, default to
12631 library version, since it is usually equally fast and result in
12634 Also emit call when we know that the count is large and call overhead
12635 will not be important. */
12636 if (!TARGET_INLINE_ALL_STRINGOPS
12637 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12640 if (TARGET_SINGLE_STRINGOP)
12641 emit_insn (gen_cld ());
12643 countreg2 = gen_reg_rtx (Pmode);
12644 countreg = copy_to_mode_reg (counter_mode, count_exp);
12646 /* We don't use loops to align destination and to copy parts smaller
12647 than 4 bytes, because gcc is able to optimize such code better (in
12648 the case the destination or the count really is aligned, gcc is often
12649 able to predict the branches) and also it is friendlier to the
12650 hardware branch prediction.
12652 Using loops is beneficial for generic case, because we can
12653 handle small counts using the loops. Many CPUs (such as Athlon)
12654 have large REP prefix setup costs.
12656 This is quite costly. Maybe we can revisit this decision later or
12657 add some customizability to this code. */
12659 if (count == 0 && align < desired_alignment)
12661 label = gen_label_rtx ();
12662 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12663 LEU, 0, counter_mode, 1, label);
12667 rtx label = ix86_expand_aligntest (destreg, 1);
12668 srcmem = change_address (src, QImode, srcreg);
12669 dstmem = change_address (dst, QImode, destreg);
12670 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12671 ix86_adjust_counter (countreg, 1);
12672 emit_label (label);
12673 LABEL_NUSES (label) = 1;
12677 rtx label = ix86_expand_aligntest (destreg, 2);
12678 srcmem = change_address (src, HImode, srcreg);
12679 dstmem = change_address (dst, HImode, destreg);
12680 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12681 ix86_adjust_counter (countreg, 2);
12682 emit_label (label);
12683 LABEL_NUSES (label) = 1;
12685 if (align <= 4 && desired_alignment > 4)
12687 rtx label = ix86_expand_aligntest (destreg, 4);
12688 srcmem = change_address (src, SImode, srcreg);
12689 dstmem = change_address (dst, SImode, destreg);
12690 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12691 ix86_adjust_counter (countreg, 4);
12692 emit_label (label);
12693 LABEL_NUSES (label) = 1;
12696 if (label && desired_alignment > 4 && !TARGET_64BIT)
12698 emit_label (label);
12699 LABEL_NUSES (label) = 1;
12702 if (!TARGET_SINGLE_STRINGOP)
12703 emit_insn (gen_cld ());
12706 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12708 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12712 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12713 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12715 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12716 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12717 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12718 countreg2, destexp, srcexp));
12722 emit_label (label);
12723 LABEL_NUSES (label) = 1;
12725 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12727 srcmem = change_address (src, SImode, srcreg);
12728 dstmem = change_address (dst, SImode, destreg);
12729 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12731 if ((align <= 4 || count == 0) && TARGET_64BIT)
12733 rtx label = ix86_expand_aligntest (countreg, 4);
12734 srcmem = change_address (src, SImode, srcreg);
12735 dstmem = change_address (dst, SImode, destreg);
12736 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12737 emit_label (label);
12738 LABEL_NUSES (label) = 1;
12740 if (align > 2 && count != 0 && (count & 2))
12742 srcmem = change_address (src, HImode, srcreg);
12743 dstmem = change_address (dst, HImode, destreg);
12744 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12746 if (align <= 2 || count == 0)
12748 rtx label = ix86_expand_aligntest (countreg, 2);
12749 srcmem = change_address (src, HImode, srcreg);
12750 dstmem = change_address (dst, HImode, destreg);
12751 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12752 emit_label (label);
12753 LABEL_NUSES (label) = 1;
12755 if (align > 1 && count != 0 && (count & 1))
12757 srcmem = change_address (src, QImode, srcreg);
12758 dstmem = change_address (dst, QImode, destreg);
12759 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12761 if (align <= 1 || count == 0)
12763 rtx label = ix86_expand_aligntest (countreg, 1);
12764 srcmem = change_address (src, QImode, srcreg);
12765 dstmem = change_address (dst, QImode, destreg);
12766 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12767 emit_label (label);
12768 LABEL_NUSES (label) = 1;
12775 /* Expand string clear operation (bzero). Use i386 string operations when
12776 profitable. expand_movmem contains similar code. */
12778 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12780 rtx destreg, zeroreg, countreg, destexp;
12781 enum machine_mode counter_mode;
12782 HOST_WIDE_INT align = 0;
12783 unsigned HOST_WIDE_INT count = 0;
12785 if (GET_CODE (align_exp) == CONST_INT)
12786 align = INTVAL (align_exp);
12788 /* Can't use any of this if the user has appropriated esi. */
12789 if (global_regs[4])
12792 /* This simple hack avoids all inlining code and simplifies code below. */
12793 if (!TARGET_ALIGN_STRINGOPS)
12796 if (GET_CODE (count_exp) == CONST_INT)
12798 count = INTVAL (count_exp);
12799 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12802 /* Figure out proper mode for counter. For 32bits it is always SImode,
12803 for 64bits use SImode when possible, otherwise DImode.
12804 Set count to number of bytes copied when known at compile time. */
12806 || GET_MODE (count_exp) == SImode
12807 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12808 counter_mode = SImode;
12810 counter_mode = DImode;
12812 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12813 if (destreg != XEXP (dst, 0))
12814 dst = replace_equiv_address_nv (dst, destreg);
12817 /* When optimizing for size emit simple rep ; movsb instruction for
12818 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12819 sequence is 7 bytes long, so if optimizing for size and count is
12820 small enough that some stosl, stosw and stosb instructions without
12821 rep are shorter, fall back into the next if. */
12823 if ((!optimize || optimize_size)
12826 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12828 emit_insn (gen_cld ());
12830 countreg = ix86_zero_extend_to_Pmode (count_exp);
12831 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12832 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12833 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12835 else if (count != 0
12837 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12838 || optimize_size || count < (unsigned int) 64))
12840 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12841 unsigned HOST_WIDE_INT offset = 0;
12843 emit_insn (gen_cld ());
12845 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12846 if (count & ~(size - 1))
12848 unsigned HOST_WIDE_INT repcount;
12849 unsigned int max_nonrep;
12851 repcount = count >> (size == 4 ? 2 : 3);
12853 repcount &= 0x3fffffff;
12855 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12856 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12857 bytes. In both cases the latter seems to be faster for small
12859 max_nonrep = size == 4 ? 7 : 4;
12860 if (!optimize_size)
12863 case PROCESSOR_PENTIUM4:
12864 case PROCESSOR_NOCONA:
12871 if (repcount <= max_nonrep)
12872 while (repcount-- > 0)
12874 rtx mem = adjust_automodify_address_nv (dst,
12875 GET_MODE (zeroreg),
12877 emit_insn (gen_strset (destreg, mem, zeroreg));
12882 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12883 countreg = ix86_zero_extend_to_Pmode (countreg);
12884 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12885 GEN_INT (size == 4 ? 2 : 3));
12886 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12887 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12889 offset = count & ~(size - 1);
12892 if (size == 8 && (count & 0x04))
12894 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12896 emit_insn (gen_strset (destreg, mem,
12897 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12902 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12904 emit_insn (gen_strset (destreg, mem,
12905 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12910 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12912 emit_insn (gen_strset (destreg, mem,
12913 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12920 /* Compute desired alignment of the string operation. */
12921 int desired_alignment = (TARGET_PENTIUMPRO
12922 && (count == 0 || count >= (unsigned int) 260)
12923 ? 8 : UNITS_PER_WORD);
12925 /* In case we don't know anything about the alignment, default to
12926 library version, since it is usually equally fast and result in
12929 Also emit call when we know that the count is large and call overhead
12930 will not be important. */
12931 if (!TARGET_INLINE_ALL_STRINGOPS
12932 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12935 if (TARGET_SINGLE_STRINGOP)
12936 emit_insn (gen_cld ());
12938 countreg2 = gen_reg_rtx (Pmode);
12939 countreg = copy_to_mode_reg (counter_mode, count_exp);
12940 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12941 /* Get rid of MEM_OFFSET, it won't be accurate. */
12942 dst = change_address (dst, BLKmode, destreg);
12944 if (count == 0 && align < desired_alignment)
12946 label = gen_label_rtx ();
12947 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12948 LEU, 0, counter_mode, 1, label);
12952 rtx label = ix86_expand_aligntest (destreg, 1);
12953 emit_insn (gen_strset (destreg, dst,
12954 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12955 ix86_adjust_counter (countreg, 1);
12956 emit_label (label);
12957 LABEL_NUSES (label) = 1;
12961 rtx label = ix86_expand_aligntest (destreg, 2);
12962 emit_insn (gen_strset (destreg, dst,
12963 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12964 ix86_adjust_counter (countreg, 2);
12965 emit_label (label);
12966 LABEL_NUSES (label) = 1;
12968 if (align <= 4 && desired_alignment > 4)
12970 rtx label = ix86_expand_aligntest (destreg, 4);
12971 emit_insn (gen_strset (destreg, dst,
12973 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12975 ix86_adjust_counter (countreg, 4);
12976 emit_label (label);
12977 LABEL_NUSES (label) = 1;
12980 if (label && desired_alignment > 4 && !TARGET_64BIT)
12982 emit_label (label);
12983 LABEL_NUSES (label) = 1;
12987 if (!TARGET_SINGLE_STRINGOP)
12988 emit_insn (gen_cld ());
12991 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12993 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12997 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12998 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13000 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13001 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13005 emit_label (label);
13006 LABEL_NUSES (label) = 1;
13009 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13010 emit_insn (gen_strset (destreg, dst,
13011 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13012 if (TARGET_64BIT && (align <= 4 || count == 0))
13014 rtx label = ix86_expand_aligntest (countreg, 4);
13015 emit_insn (gen_strset (destreg, dst,
13016 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13017 emit_label (label);
13018 LABEL_NUSES (label) = 1;
13020 if (align > 2 && count != 0 && (count & 2))
13021 emit_insn (gen_strset (destreg, dst,
13022 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13023 if (align <= 2 || count == 0)
13025 rtx label = ix86_expand_aligntest (countreg, 2);
13026 emit_insn (gen_strset (destreg, dst,
13027 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13028 emit_label (label);
13029 LABEL_NUSES (label) = 1;
13031 if (align > 1 && count != 0 && (count & 1))
13032 emit_insn (gen_strset (destreg, dst,
13033 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13034 if (align <= 1 || count == 0)
13036 rtx label = ix86_expand_aligntest (countreg, 1);
13037 emit_insn (gen_strset (destreg, dst,
13038 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13039 emit_label (label);
13040 LABEL_NUSES (label) = 1;
13046 /* Expand strlen. */
13048 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13050 rtx addr, scratch1, scratch2, scratch3, scratch4;
13052 /* The generic case of strlen expander is long. Avoid it's
13053 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13055 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13056 && !TARGET_INLINE_ALL_STRINGOPS
13058 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13061 addr = force_reg (Pmode, XEXP (src, 0));
13062 scratch1 = gen_reg_rtx (Pmode);
13064 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13067 /* Well it seems that some optimizer does not combine a call like
13068 foo(strlen(bar), strlen(bar));
13069 when the move and the subtraction is done here. It does calculate
13070 the length just once when these instructions are done inside of
13071 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13072 often used and I use one fewer register for the lifetime of
13073 output_strlen_unroll() this is better. */
13075 emit_move_insn (out, addr);
13077 ix86_expand_strlensi_unroll_1 (out, src, align);
13079 /* strlensi_unroll_1 returns the address of the zero at the end of
13080 the string, like memchr(), so compute the length by subtracting
13081 the start address. */
13083 emit_insn (gen_subdi3 (out, out, addr));
13085 emit_insn (gen_subsi3 (out, out, addr));
13090 scratch2 = gen_reg_rtx (Pmode);
13091 scratch3 = gen_reg_rtx (Pmode);
13092 scratch4 = force_reg (Pmode, constm1_rtx);
13094 emit_move_insn (scratch3, addr);
13095 eoschar = force_reg (QImode, eoschar);
13097 emit_insn (gen_cld ());
13098 src = replace_equiv_address_nv (src, scratch3);
13100 /* If .md starts supporting :P, this can be done in .md. */
13101 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13102 scratch4), UNSPEC_SCAS);
13103 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13106 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13107 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13111 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13112 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13118 /* Expand the appropriate insns for doing strlen if not just doing
13121 out = result, initialized with the start address
13122 align_rtx = alignment of the address.
13123 scratch = scratch register, initialized with the startaddress when
13124 not aligned, otherwise undefined
13126 This is just the body. It needs the initializations mentioned above and
13127 some address computing at the end. These things are done in i386.md. */
13130 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13134 rtx align_2_label = NULL_RTX;
13135 rtx align_3_label = NULL_RTX;
13136 rtx align_4_label = gen_label_rtx ();
13137 rtx end_0_label = gen_label_rtx ();
13139 rtx tmpreg = gen_reg_rtx (SImode);
13140 rtx scratch = gen_reg_rtx (SImode);
13144 if (GET_CODE (align_rtx) == CONST_INT)
13145 align = INTVAL (align_rtx);
13147 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13149 /* Is there a known alignment and is it less than 4? */
13152 rtx scratch1 = gen_reg_rtx (Pmode);
13153 emit_move_insn (scratch1, out);
13154 /* Is there a known alignment and is it not 2? */
13157 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13158 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13160 /* Leave just the 3 lower bits. */
13161 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13162 NULL_RTX, 0, OPTAB_WIDEN);
13164 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13165 Pmode, 1, align_4_label);
13166 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13167 Pmode, 1, align_2_label);
13168 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13169 Pmode, 1, align_3_label);
13173 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13174 check if is aligned to 4 - byte. */
13176 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13177 NULL_RTX, 0, OPTAB_WIDEN);
13179 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13180 Pmode, 1, align_4_label);
13183 mem = change_address (src, QImode, out);
13185 /* Now compare the bytes. */
13187 /* Compare the first n unaligned byte on a byte per byte basis. */
13188 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13189 QImode, 1, end_0_label);
13191 /* Increment the address. */
13193 emit_insn (gen_adddi3 (out, out, const1_rtx));
13195 emit_insn (gen_addsi3 (out, out, const1_rtx));
13197 /* Not needed with an alignment of 2 */
13200 emit_label (align_2_label);
13202 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13206 emit_insn (gen_adddi3 (out, out, const1_rtx));
13208 emit_insn (gen_addsi3 (out, out, const1_rtx));
13210 emit_label (align_3_label);
13213 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13217 emit_insn (gen_adddi3 (out, out, const1_rtx));
13219 emit_insn (gen_addsi3 (out, out, const1_rtx));
13222 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13223 align this loop. It gives only huge programs, but does not help to
13225 emit_label (align_4_label);
13227 mem = change_address (src, SImode, out);
13228 emit_move_insn (scratch, mem);
13230 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13232 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13234 /* This formula yields a nonzero result iff one of the bytes is zero.
13235 This saves three branches inside loop and many cycles. */
13237 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13238 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13239 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13240 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13241 gen_int_mode (0x80808080, SImode)));
13242 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13247 rtx reg = gen_reg_rtx (SImode);
13248 rtx reg2 = gen_reg_rtx (Pmode);
13249 emit_move_insn (reg, tmpreg);
13250 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13252 /* If zero is not in the first two bytes, move two bytes forward. */
13253 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13254 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13255 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13256 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13257 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13260 /* Emit lea manually to avoid clobbering of flags. */
13261 emit_insn (gen_rtx_SET (SImode, reg2,
13262 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13264 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13265 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13266 emit_insn (gen_rtx_SET (VOIDmode, out,
13267 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13274 rtx end_2_label = gen_label_rtx ();
13275 /* Is zero in the first two bytes? */
13277 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13278 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13279 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13280 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13281 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13283 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13284 JUMP_LABEL (tmp) = end_2_label;
13286 /* Not in the first two. Move two bytes forward. */
13287 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13289 emit_insn (gen_adddi3 (out, out, const2_rtx));
13291 emit_insn (gen_addsi3 (out, out, const2_rtx));
13293 emit_label (end_2_label);
13297 /* Avoid branch in fixing the byte. */
13298 tmpreg = gen_lowpart (QImode, tmpreg);
13299 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13300 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13302 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13304 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13306 emit_label (end_0_label);
13310 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13311 rtx callarg2 ATTRIBUTE_UNUSED,
13312 rtx pop, int sibcall)
13314 rtx use = NULL, call;
13316 if (pop == const0_rtx)
13318 gcc_assert (!TARGET_64BIT || !pop);
13320 if (TARGET_MACHO && !TARGET_64BIT)
13323 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13324 fnaddr = machopic_indirect_call_target (fnaddr);
13329 /* Static functions and indirect calls don't need the pic register. */
13330 if (! TARGET_64BIT && flag_pic
13331 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13332 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13333 use_reg (&use, pic_offset_table_rtx);
13336 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13338 rtx al = gen_rtx_REG (QImode, 0);
13339 emit_move_insn (al, callarg2);
13340 use_reg (&use, al);
13343 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13345 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13346 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13348 if (sibcall && TARGET_64BIT
13349 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13352 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13353 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13354 emit_move_insn (fnaddr, addr);
13355 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13358 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13360 call = gen_rtx_SET (VOIDmode, retval, call);
13363 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13364 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13365 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13368 call = emit_call_insn (call);
13370 CALL_INSN_FUNCTION_USAGE (call) = use;
13374 /* Clear stack slot assignments remembered from previous functions.
13375 This is called from INIT_EXPANDERS once before RTL is emitted for each
13378 static struct machine_function *
13379 ix86_init_machine_status (void)
13381 struct machine_function *f;
13383 f = ggc_alloc_cleared (sizeof (struct machine_function));
13384 f->use_fast_prologue_epilogue_nregs = -1;
13385 f->tls_descriptor_call_expanded_p = 0;
13390 /* Return a MEM corresponding to a stack slot with mode MODE.
13391 Allocate a new slot if necessary.
13393 The RTL for a function can have several slots available: N is
13394 which slot to use. */
13397 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13399 struct stack_local_entry *s;
13401 gcc_assert (n < MAX_386_STACK_LOCALS);
13403 for (s = ix86_stack_locals; s; s = s->next)
13404 if (s->mode == mode && s->n == n)
13407 s = (struct stack_local_entry *)
13408 ggc_alloc (sizeof (struct stack_local_entry));
13411 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13413 s->next = ix86_stack_locals;
13414 ix86_stack_locals = s;
13418 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13420 static GTY(()) rtx ix86_tls_symbol;
13422 ix86_tls_get_addr (void)
13425 if (!ix86_tls_symbol)
13427 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13428 (TARGET_ANY_GNU_TLS
13430 ? "___tls_get_addr"
13431 : "__tls_get_addr");
13434 return ix86_tls_symbol;
13437 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13439 static GTY(()) rtx ix86_tls_module_base_symbol;
13441 ix86_tls_module_base (void)
13444 if (!ix86_tls_module_base_symbol)
13446 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13447 "_TLS_MODULE_BASE_");
13448 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13449 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13452 return ix86_tls_module_base_symbol;
13455 /* Calculate the length of the memory address in the instruction
13456 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13459 memory_address_length (rtx addr)
13461 struct ix86_address parts;
13462 rtx base, index, disp;
13466 if (GET_CODE (addr) == PRE_DEC
13467 || GET_CODE (addr) == POST_INC
13468 || GET_CODE (addr) == PRE_MODIFY
13469 || GET_CODE (addr) == POST_MODIFY)
13472 ok = ix86_decompose_address (addr, &parts);
13475 if (parts.base && GET_CODE (parts.base) == SUBREG)
13476 parts.base = SUBREG_REG (parts.base);
13477 if (parts.index && GET_CODE (parts.index) == SUBREG)
13478 parts.index = SUBREG_REG (parts.index);
13481 index = parts.index;
13486 - esp as the base always wants an index,
13487 - ebp as the base always wants a displacement. */
13489 /* Register Indirect. */
13490 if (base && !index && !disp)
13492 /* esp (for its index) and ebp (for its displacement) need
13493 the two-byte modrm form. */
13494 if (addr == stack_pointer_rtx
13495 || addr == arg_pointer_rtx
13496 || addr == frame_pointer_rtx
13497 || addr == hard_frame_pointer_rtx)
13501 /* Direct Addressing. */
13502 else if (disp && !base && !index)
13507 /* Find the length of the displacement constant. */
13510 if (base && satisfies_constraint_K (disp))
13515 /* ebp always wants a displacement. */
13516 else if (base == hard_frame_pointer_rtx)
13519 /* An index requires the two-byte modrm form.... */
13521 /* ...like esp, which always wants an index. */
13522 || base == stack_pointer_rtx
13523 || base == arg_pointer_rtx
13524 || base == frame_pointer_rtx)
13531 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13532 is set, expect that insn have 8bit immediate alternative. */
13534 ix86_attr_length_immediate_default (rtx insn, int shortform)
13538 extract_insn_cached (insn);
13539 for (i = recog_data.n_operands - 1; i >= 0; --i)
13540 if (CONSTANT_P (recog_data.operand[i]))
13543 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13547 switch (get_attr_mode (insn))
13558 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13563 fatal_insn ("unknown insn mode", insn);
13569 /* Compute default value for "length_address" attribute. */
13571 ix86_attr_length_address_default (rtx insn)
13575 if (get_attr_type (insn) == TYPE_LEA)
13577 rtx set = PATTERN (insn);
13579 if (GET_CODE (set) == PARALLEL)
13580 set = XVECEXP (set, 0, 0);
13582 gcc_assert (GET_CODE (set) == SET);
13584 return memory_address_length (SET_SRC (set));
13587 extract_insn_cached (insn);
13588 for (i = recog_data.n_operands - 1; i >= 0; --i)
13589 if (GET_CODE (recog_data.operand[i]) == MEM)
13591 return memory_address_length (XEXP (recog_data.operand[i], 0));
13597 /* Return the maximum number of instructions a cpu can issue. */
13600 ix86_issue_rate (void)
13604 case PROCESSOR_PENTIUM:
13608 case PROCESSOR_PENTIUMPRO:
13609 case PROCESSOR_PENTIUM4:
13610 case PROCESSOR_ATHLON:
13612 case PROCESSOR_NOCONA:
13613 case PROCESSOR_GENERIC32:
13614 case PROCESSOR_GENERIC64:
13622 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13623 by DEP_INSN and nothing set by DEP_INSN. */
13626 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13630 /* Simplify the test for uninteresting insns. */
13631 if (insn_type != TYPE_SETCC
13632 && insn_type != TYPE_ICMOV
13633 && insn_type != TYPE_FCMOV
13634 && insn_type != TYPE_IBR)
13637 if ((set = single_set (dep_insn)) != 0)
13639 set = SET_DEST (set);
13642 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13643 && XVECLEN (PATTERN (dep_insn), 0) == 2
13644 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13645 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13647 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13648 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13653 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13656 /* This test is true if the dependent insn reads the flags but
13657 not any other potentially set register. */
13658 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13661 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13667 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13668 address with operands set by DEP_INSN. */
13671 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13675 if (insn_type == TYPE_LEA
13678 addr = PATTERN (insn);
13680 if (GET_CODE (addr) == PARALLEL)
13681 addr = XVECEXP (addr, 0, 0);
13683 gcc_assert (GET_CODE (addr) == SET);
13685 addr = SET_SRC (addr);
13690 extract_insn_cached (insn);
13691 for (i = recog_data.n_operands - 1; i >= 0; --i)
13692 if (GET_CODE (recog_data.operand[i]) == MEM)
13694 addr = XEXP (recog_data.operand[i], 0);
13701 return modified_in_p (addr, dep_insn);
13705 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13707 enum attr_type insn_type, dep_insn_type;
13708 enum attr_memory memory;
13710 int dep_insn_code_number;
13712 /* Anti and output dependencies have zero cost on all CPUs. */
13713 if (REG_NOTE_KIND (link) != 0)
13716 dep_insn_code_number = recog_memoized (dep_insn);
13718 /* If we can't recognize the insns, we can't really do anything. */
13719 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13722 insn_type = get_attr_type (insn);
13723 dep_insn_type = get_attr_type (dep_insn);
13727 case PROCESSOR_PENTIUM:
13728 /* Address Generation Interlock adds a cycle of latency. */
13729 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13732 /* ??? Compares pair with jump/setcc. */
13733 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13736 /* Floating point stores require value to be ready one cycle earlier. */
13737 if (insn_type == TYPE_FMOV
13738 && get_attr_memory (insn) == MEMORY_STORE
13739 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13743 case PROCESSOR_PENTIUMPRO:
13744 memory = get_attr_memory (insn);
13746 /* INT->FP conversion is expensive. */
13747 if (get_attr_fp_int_src (dep_insn))
13750 /* There is one cycle extra latency between an FP op and a store. */
13751 if (insn_type == TYPE_FMOV
13752 && (set = single_set (dep_insn)) != NULL_RTX
13753 && (set2 = single_set (insn)) != NULL_RTX
13754 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13755 && GET_CODE (SET_DEST (set2)) == MEM)
13758 /* Show ability of reorder buffer to hide latency of load by executing
13759 in parallel with previous instruction in case
13760 previous instruction is not needed to compute the address. */
13761 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13762 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13764 /* Claim moves to take one cycle, as core can issue one load
13765 at time and the next load can start cycle later. */
13766 if (dep_insn_type == TYPE_IMOV
13767 || dep_insn_type == TYPE_FMOV)
13775 memory = get_attr_memory (insn);
13777 /* The esp dependency is resolved before the instruction is really
13779 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13780 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13783 /* INT->FP conversion is expensive. */
13784 if (get_attr_fp_int_src (dep_insn))
13787 /* Show ability of reorder buffer to hide latency of load by executing
13788 in parallel with previous instruction in case
13789 previous instruction is not needed to compute the address. */
13790 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13791 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13793 /* Claim moves to take one cycle, as core can issue one load
13794 at time and the next load can start cycle later. */
13795 if (dep_insn_type == TYPE_IMOV
13796 || dep_insn_type == TYPE_FMOV)
13805 case PROCESSOR_ATHLON:
13807 case PROCESSOR_GENERIC32:
13808 case PROCESSOR_GENERIC64:
13809 memory = get_attr_memory (insn);
13811 /* Show ability of reorder buffer to hide latency of load by executing
13812 in parallel with previous instruction in case
13813 previous instruction is not needed to compute the address. */
13814 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13815 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13817 enum attr_unit unit = get_attr_unit (insn);
13820 /* Because of the difference between the length of integer and
13821 floating unit pipeline preparation stages, the memory operands
13822 for floating point are cheaper.
13824 ??? For Athlon it the difference is most probably 2. */
13825 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13828 loadcost = TARGET_ATHLON ? 2 : 0;
13830 if (cost >= loadcost)
13843 /* How many alternative schedules to try. This should be as wide as the
13844 scheduling freedom in the DFA, but no wider. Making this value too
13845 large results extra work for the scheduler. */
13848 ia32_multipass_dfa_lookahead (void)
13850 if (ix86_tune == PROCESSOR_PENTIUM)
13853 if (ix86_tune == PROCESSOR_PENTIUMPRO
13854 || ix86_tune == PROCESSOR_K6)
13862 /* Compute the alignment given to a constant that is being placed in memory.
13863 EXP is the constant and ALIGN is the alignment that the object would
13865 The value of this function is used instead of that alignment to align
13869 ix86_constant_alignment (tree exp, int align)
13871 if (TREE_CODE (exp) == REAL_CST)
13873 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13875 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13878 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13879 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13880 return BITS_PER_WORD;
13885 /* Compute the alignment for a static variable.
13886 TYPE is the data type, and ALIGN is the alignment that
13887 the object would ordinarily have. The value of this function is used
13888 instead of that alignment to align the object. */
13891 ix86_data_alignment (tree type, int align)
13893 int max_align = optimize_size ? BITS_PER_WORD : 256;
13895 if (AGGREGATE_TYPE_P (type)
13896 && TYPE_SIZE (type)
13897 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13898 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13899 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13900 && align < max_align)
13903 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13904 to 16byte boundary. */
13907 if (AGGREGATE_TYPE_P (type)
13908 && TYPE_SIZE (type)
13909 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13910 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13911 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13915 if (TREE_CODE (type) == ARRAY_TYPE)
13917 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13919 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13922 else if (TREE_CODE (type) == COMPLEX_TYPE)
13925 if (TYPE_MODE (type) == DCmode && align < 64)
13927 if (TYPE_MODE (type) == XCmode && align < 128)
13930 else if ((TREE_CODE (type) == RECORD_TYPE
13931 || TREE_CODE (type) == UNION_TYPE
13932 || TREE_CODE (type) == QUAL_UNION_TYPE)
13933 && TYPE_FIELDS (type))
13935 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13937 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13940 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13941 || TREE_CODE (type) == INTEGER_TYPE)
13943 if (TYPE_MODE (type) == DFmode && align < 64)
13945 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13952 /* Compute the alignment for a local variable.
13953 TYPE is the data type, and ALIGN is the alignment that
13954 the object would ordinarily have. The value of this macro is used
13955 instead of that alignment to align the object. */
13958 ix86_local_alignment (tree type, int align)
13960 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13961 to 16byte boundary. */
13964 if (AGGREGATE_TYPE_P (type)
13965 && TYPE_SIZE (type)
13966 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13967 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13968 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13971 if (TREE_CODE (type) == ARRAY_TYPE)
13973 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13975 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13978 else if (TREE_CODE (type) == COMPLEX_TYPE)
13980 if (TYPE_MODE (type) == DCmode && align < 64)
13982 if (TYPE_MODE (type) == XCmode && align < 128)
13985 else if ((TREE_CODE (type) == RECORD_TYPE
13986 || TREE_CODE (type) == UNION_TYPE
13987 || TREE_CODE (type) == QUAL_UNION_TYPE)
13988 && TYPE_FIELDS (type))
13990 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13992 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13995 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13996 || TREE_CODE (type) == INTEGER_TYPE)
13999 if (TYPE_MODE (type) == DFmode && align < 64)
14001 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14007 /* Emit RTL insns to initialize the variable parts of a trampoline.
14008 FNADDR is an RTX for the address of the function's pure code.
14009 CXT is an RTX for the static chain value for the function. */
14011 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14015 /* Compute offset from the end of the jmp to the target function. */
14016 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14017 plus_constant (tramp, 10),
14018 NULL_RTX, 1, OPTAB_DIRECT);
14019 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14020 gen_int_mode (0xb9, QImode));
14021 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14022 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14023 gen_int_mode (0xe9, QImode));
14024 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14029 /* Try to load address using shorter movl instead of movabs.
14030 We may want to support movq for kernel mode, but kernel does not use
14031 trampolines at the moment. */
14032 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14034 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14035 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14036 gen_int_mode (0xbb41, HImode));
14037 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14038 gen_lowpart (SImode, fnaddr));
14043 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14044 gen_int_mode (0xbb49, HImode));
14045 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14049 /* Load static chain using movabs to r10. */
14050 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14051 gen_int_mode (0xba49, HImode));
14052 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14055 /* Jump to the r11 */
14056 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14057 gen_int_mode (0xff49, HImode));
14058 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14059 gen_int_mode (0xe3, QImode));
14061 gcc_assert (offset <= TRAMPOLINE_SIZE);
14064 #ifdef ENABLE_EXECUTE_STACK
14065 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14066 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14070 /* Codes for all the SSE/MMX builtins. */
14073 IX86_BUILTIN_ADDPS,
14074 IX86_BUILTIN_ADDSS,
14075 IX86_BUILTIN_DIVPS,
14076 IX86_BUILTIN_DIVSS,
14077 IX86_BUILTIN_MULPS,
14078 IX86_BUILTIN_MULSS,
14079 IX86_BUILTIN_SUBPS,
14080 IX86_BUILTIN_SUBSS,
14082 IX86_BUILTIN_CMPEQPS,
14083 IX86_BUILTIN_CMPLTPS,
14084 IX86_BUILTIN_CMPLEPS,
14085 IX86_BUILTIN_CMPGTPS,
14086 IX86_BUILTIN_CMPGEPS,
14087 IX86_BUILTIN_CMPNEQPS,
14088 IX86_BUILTIN_CMPNLTPS,
14089 IX86_BUILTIN_CMPNLEPS,
14090 IX86_BUILTIN_CMPNGTPS,
14091 IX86_BUILTIN_CMPNGEPS,
14092 IX86_BUILTIN_CMPORDPS,
14093 IX86_BUILTIN_CMPUNORDPS,
14094 IX86_BUILTIN_CMPEQSS,
14095 IX86_BUILTIN_CMPLTSS,
14096 IX86_BUILTIN_CMPLESS,
14097 IX86_BUILTIN_CMPNEQSS,
14098 IX86_BUILTIN_CMPNLTSS,
14099 IX86_BUILTIN_CMPNLESS,
14100 IX86_BUILTIN_CMPNGTSS,
14101 IX86_BUILTIN_CMPNGESS,
14102 IX86_BUILTIN_CMPORDSS,
14103 IX86_BUILTIN_CMPUNORDSS,
14105 IX86_BUILTIN_COMIEQSS,
14106 IX86_BUILTIN_COMILTSS,
14107 IX86_BUILTIN_COMILESS,
14108 IX86_BUILTIN_COMIGTSS,
14109 IX86_BUILTIN_COMIGESS,
14110 IX86_BUILTIN_COMINEQSS,
14111 IX86_BUILTIN_UCOMIEQSS,
14112 IX86_BUILTIN_UCOMILTSS,
14113 IX86_BUILTIN_UCOMILESS,
14114 IX86_BUILTIN_UCOMIGTSS,
14115 IX86_BUILTIN_UCOMIGESS,
14116 IX86_BUILTIN_UCOMINEQSS,
14118 IX86_BUILTIN_CVTPI2PS,
14119 IX86_BUILTIN_CVTPS2PI,
14120 IX86_BUILTIN_CVTSI2SS,
14121 IX86_BUILTIN_CVTSI642SS,
14122 IX86_BUILTIN_CVTSS2SI,
14123 IX86_BUILTIN_CVTSS2SI64,
14124 IX86_BUILTIN_CVTTPS2PI,
14125 IX86_BUILTIN_CVTTSS2SI,
14126 IX86_BUILTIN_CVTTSS2SI64,
14128 IX86_BUILTIN_MAXPS,
14129 IX86_BUILTIN_MAXSS,
14130 IX86_BUILTIN_MINPS,
14131 IX86_BUILTIN_MINSS,
14133 IX86_BUILTIN_LOADUPS,
14134 IX86_BUILTIN_STOREUPS,
14135 IX86_BUILTIN_MOVSS,
14137 IX86_BUILTIN_MOVHLPS,
14138 IX86_BUILTIN_MOVLHPS,
14139 IX86_BUILTIN_LOADHPS,
14140 IX86_BUILTIN_LOADLPS,
14141 IX86_BUILTIN_STOREHPS,
14142 IX86_BUILTIN_STORELPS,
14144 IX86_BUILTIN_MASKMOVQ,
14145 IX86_BUILTIN_MOVMSKPS,
14146 IX86_BUILTIN_PMOVMSKB,
14148 IX86_BUILTIN_MOVNTPS,
14149 IX86_BUILTIN_MOVNTQ,
14151 IX86_BUILTIN_LOADDQU,
14152 IX86_BUILTIN_STOREDQU,
14154 IX86_BUILTIN_PACKSSWB,
14155 IX86_BUILTIN_PACKSSDW,
14156 IX86_BUILTIN_PACKUSWB,
14158 IX86_BUILTIN_PADDB,
14159 IX86_BUILTIN_PADDW,
14160 IX86_BUILTIN_PADDD,
14161 IX86_BUILTIN_PADDQ,
14162 IX86_BUILTIN_PADDSB,
14163 IX86_BUILTIN_PADDSW,
14164 IX86_BUILTIN_PADDUSB,
14165 IX86_BUILTIN_PADDUSW,
14166 IX86_BUILTIN_PSUBB,
14167 IX86_BUILTIN_PSUBW,
14168 IX86_BUILTIN_PSUBD,
14169 IX86_BUILTIN_PSUBQ,
14170 IX86_BUILTIN_PSUBSB,
14171 IX86_BUILTIN_PSUBSW,
14172 IX86_BUILTIN_PSUBUSB,
14173 IX86_BUILTIN_PSUBUSW,
14176 IX86_BUILTIN_PANDN,
14180 IX86_BUILTIN_PAVGB,
14181 IX86_BUILTIN_PAVGW,
14183 IX86_BUILTIN_PCMPEQB,
14184 IX86_BUILTIN_PCMPEQW,
14185 IX86_BUILTIN_PCMPEQD,
14186 IX86_BUILTIN_PCMPGTB,
14187 IX86_BUILTIN_PCMPGTW,
14188 IX86_BUILTIN_PCMPGTD,
14190 IX86_BUILTIN_PMADDWD,
14192 IX86_BUILTIN_PMAXSW,
14193 IX86_BUILTIN_PMAXUB,
14194 IX86_BUILTIN_PMINSW,
14195 IX86_BUILTIN_PMINUB,
14197 IX86_BUILTIN_PMULHUW,
14198 IX86_BUILTIN_PMULHW,
14199 IX86_BUILTIN_PMULLW,
14201 IX86_BUILTIN_PSADBW,
14202 IX86_BUILTIN_PSHUFW,
14204 IX86_BUILTIN_PSLLW,
14205 IX86_BUILTIN_PSLLD,
14206 IX86_BUILTIN_PSLLQ,
14207 IX86_BUILTIN_PSRAW,
14208 IX86_BUILTIN_PSRAD,
14209 IX86_BUILTIN_PSRLW,
14210 IX86_BUILTIN_PSRLD,
14211 IX86_BUILTIN_PSRLQ,
14212 IX86_BUILTIN_PSLLWI,
14213 IX86_BUILTIN_PSLLDI,
14214 IX86_BUILTIN_PSLLQI,
14215 IX86_BUILTIN_PSRAWI,
14216 IX86_BUILTIN_PSRADI,
14217 IX86_BUILTIN_PSRLWI,
14218 IX86_BUILTIN_PSRLDI,
14219 IX86_BUILTIN_PSRLQI,
14221 IX86_BUILTIN_PUNPCKHBW,
14222 IX86_BUILTIN_PUNPCKHWD,
14223 IX86_BUILTIN_PUNPCKHDQ,
14224 IX86_BUILTIN_PUNPCKLBW,
14225 IX86_BUILTIN_PUNPCKLWD,
14226 IX86_BUILTIN_PUNPCKLDQ,
14228 IX86_BUILTIN_SHUFPS,
14230 IX86_BUILTIN_RCPPS,
14231 IX86_BUILTIN_RCPSS,
14232 IX86_BUILTIN_RSQRTPS,
14233 IX86_BUILTIN_RSQRTSS,
14234 IX86_BUILTIN_SQRTPS,
14235 IX86_BUILTIN_SQRTSS,
14237 IX86_BUILTIN_UNPCKHPS,
14238 IX86_BUILTIN_UNPCKLPS,
14240 IX86_BUILTIN_ANDPS,
14241 IX86_BUILTIN_ANDNPS,
14243 IX86_BUILTIN_XORPS,
14246 IX86_BUILTIN_LDMXCSR,
14247 IX86_BUILTIN_STMXCSR,
14248 IX86_BUILTIN_SFENCE,
14250 /* 3DNow! Original */
14251 IX86_BUILTIN_FEMMS,
14252 IX86_BUILTIN_PAVGUSB,
14253 IX86_BUILTIN_PF2ID,
14254 IX86_BUILTIN_PFACC,
14255 IX86_BUILTIN_PFADD,
14256 IX86_BUILTIN_PFCMPEQ,
14257 IX86_BUILTIN_PFCMPGE,
14258 IX86_BUILTIN_PFCMPGT,
14259 IX86_BUILTIN_PFMAX,
14260 IX86_BUILTIN_PFMIN,
14261 IX86_BUILTIN_PFMUL,
14262 IX86_BUILTIN_PFRCP,
14263 IX86_BUILTIN_PFRCPIT1,
14264 IX86_BUILTIN_PFRCPIT2,
14265 IX86_BUILTIN_PFRSQIT1,
14266 IX86_BUILTIN_PFRSQRT,
14267 IX86_BUILTIN_PFSUB,
14268 IX86_BUILTIN_PFSUBR,
14269 IX86_BUILTIN_PI2FD,
14270 IX86_BUILTIN_PMULHRW,
14272 /* 3DNow! Athlon Extensions */
14273 IX86_BUILTIN_PF2IW,
14274 IX86_BUILTIN_PFNACC,
14275 IX86_BUILTIN_PFPNACC,
14276 IX86_BUILTIN_PI2FW,
14277 IX86_BUILTIN_PSWAPDSI,
14278 IX86_BUILTIN_PSWAPDSF,
14281 IX86_BUILTIN_ADDPD,
14282 IX86_BUILTIN_ADDSD,
14283 IX86_BUILTIN_DIVPD,
14284 IX86_BUILTIN_DIVSD,
14285 IX86_BUILTIN_MULPD,
14286 IX86_BUILTIN_MULSD,
14287 IX86_BUILTIN_SUBPD,
14288 IX86_BUILTIN_SUBSD,
14290 IX86_BUILTIN_CMPEQPD,
14291 IX86_BUILTIN_CMPLTPD,
14292 IX86_BUILTIN_CMPLEPD,
14293 IX86_BUILTIN_CMPGTPD,
14294 IX86_BUILTIN_CMPGEPD,
14295 IX86_BUILTIN_CMPNEQPD,
14296 IX86_BUILTIN_CMPNLTPD,
14297 IX86_BUILTIN_CMPNLEPD,
14298 IX86_BUILTIN_CMPNGTPD,
14299 IX86_BUILTIN_CMPNGEPD,
14300 IX86_BUILTIN_CMPORDPD,
14301 IX86_BUILTIN_CMPUNORDPD,
14302 IX86_BUILTIN_CMPNEPD,
14303 IX86_BUILTIN_CMPEQSD,
14304 IX86_BUILTIN_CMPLTSD,
14305 IX86_BUILTIN_CMPLESD,
14306 IX86_BUILTIN_CMPNEQSD,
14307 IX86_BUILTIN_CMPNLTSD,
14308 IX86_BUILTIN_CMPNLESD,
14309 IX86_BUILTIN_CMPORDSD,
14310 IX86_BUILTIN_CMPUNORDSD,
14311 IX86_BUILTIN_CMPNESD,
14313 IX86_BUILTIN_COMIEQSD,
14314 IX86_BUILTIN_COMILTSD,
14315 IX86_BUILTIN_COMILESD,
14316 IX86_BUILTIN_COMIGTSD,
14317 IX86_BUILTIN_COMIGESD,
14318 IX86_BUILTIN_COMINEQSD,
14319 IX86_BUILTIN_UCOMIEQSD,
14320 IX86_BUILTIN_UCOMILTSD,
14321 IX86_BUILTIN_UCOMILESD,
14322 IX86_BUILTIN_UCOMIGTSD,
14323 IX86_BUILTIN_UCOMIGESD,
14324 IX86_BUILTIN_UCOMINEQSD,
14326 IX86_BUILTIN_MAXPD,
14327 IX86_BUILTIN_MAXSD,
14328 IX86_BUILTIN_MINPD,
14329 IX86_BUILTIN_MINSD,
14331 IX86_BUILTIN_ANDPD,
14332 IX86_BUILTIN_ANDNPD,
14334 IX86_BUILTIN_XORPD,
14336 IX86_BUILTIN_SQRTPD,
14337 IX86_BUILTIN_SQRTSD,
14339 IX86_BUILTIN_UNPCKHPD,
14340 IX86_BUILTIN_UNPCKLPD,
14342 IX86_BUILTIN_SHUFPD,
14344 IX86_BUILTIN_LOADUPD,
14345 IX86_BUILTIN_STOREUPD,
14346 IX86_BUILTIN_MOVSD,
14348 IX86_BUILTIN_LOADHPD,
14349 IX86_BUILTIN_LOADLPD,
14351 IX86_BUILTIN_CVTDQ2PD,
14352 IX86_BUILTIN_CVTDQ2PS,
14354 IX86_BUILTIN_CVTPD2DQ,
14355 IX86_BUILTIN_CVTPD2PI,
14356 IX86_BUILTIN_CVTPD2PS,
14357 IX86_BUILTIN_CVTTPD2DQ,
14358 IX86_BUILTIN_CVTTPD2PI,
14360 IX86_BUILTIN_CVTPI2PD,
14361 IX86_BUILTIN_CVTSI2SD,
14362 IX86_BUILTIN_CVTSI642SD,
14364 IX86_BUILTIN_CVTSD2SI,
14365 IX86_BUILTIN_CVTSD2SI64,
14366 IX86_BUILTIN_CVTSD2SS,
14367 IX86_BUILTIN_CVTSS2SD,
14368 IX86_BUILTIN_CVTTSD2SI,
14369 IX86_BUILTIN_CVTTSD2SI64,
14371 IX86_BUILTIN_CVTPS2DQ,
14372 IX86_BUILTIN_CVTPS2PD,
14373 IX86_BUILTIN_CVTTPS2DQ,
14375 IX86_BUILTIN_MOVNTI,
14376 IX86_BUILTIN_MOVNTPD,
14377 IX86_BUILTIN_MOVNTDQ,
14380 IX86_BUILTIN_MASKMOVDQU,
14381 IX86_BUILTIN_MOVMSKPD,
14382 IX86_BUILTIN_PMOVMSKB128,
14384 IX86_BUILTIN_PACKSSWB128,
14385 IX86_BUILTIN_PACKSSDW128,
14386 IX86_BUILTIN_PACKUSWB128,
14388 IX86_BUILTIN_PADDB128,
14389 IX86_BUILTIN_PADDW128,
14390 IX86_BUILTIN_PADDD128,
14391 IX86_BUILTIN_PADDQ128,
14392 IX86_BUILTIN_PADDSB128,
14393 IX86_BUILTIN_PADDSW128,
14394 IX86_BUILTIN_PADDUSB128,
14395 IX86_BUILTIN_PADDUSW128,
14396 IX86_BUILTIN_PSUBB128,
14397 IX86_BUILTIN_PSUBW128,
14398 IX86_BUILTIN_PSUBD128,
14399 IX86_BUILTIN_PSUBQ128,
14400 IX86_BUILTIN_PSUBSB128,
14401 IX86_BUILTIN_PSUBSW128,
14402 IX86_BUILTIN_PSUBUSB128,
14403 IX86_BUILTIN_PSUBUSW128,
14405 IX86_BUILTIN_PAND128,
14406 IX86_BUILTIN_PANDN128,
14407 IX86_BUILTIN_POR128,
14408 IX86_BUILTIN_PXOR128,
14410 IX86_BUILTIN_PAVGB128,
14411 IX86_BUILTIN_PAVGW128,
14413 IX86_BUILTIN_PCMPEQB128,
14414 IX86_BUILTIN_PCMPEQW128,
14415 IX86_BUILTIN_PCMPEQD128,
14416 IX86_BUILTIN_PCMPGTB128,
14417 IX86_BUILTIN_PCMPGTW128,
14418 IX86_BUILTIN_PCMPGTD128,
14420 IX86_BUILTIN_PMADDWD128,
14422 IX86_BUILTIN_PMAXSW128,
14423 IX86_BUILTIN_PMAXUB128,
14424 IX86_BUILTIN_PMINSW128,
14425 IX86_BUILTIN_PMINUB128,
14427 IX86_BUILTIN_PMULUDQ,
14428 IX86_BUILTIN_PMULUDQ128,
14429 IX86_BUILTIN_PMULHUW128,
14430 IX86_BUILTIN_PMULHW128,
14431 IX86_BUILTIN_PMULLW128,
14433 IX86_BUILTIN_PSADBW128,
14434 IX86_BUILTIN_PSHUFHW,
14435 IX86_BUILTIN_PSHUFLW,
14436 IX86_BUILTIN_PSHUFD,
14438 IX86_BUILTIN_PSLLW128,
14439 IX86_BUILTIN_PSLLD128,
14440 IX86_BUILTIN_PSLLQ128,
14441 IX86_BUILTIN_PSRAW128,
14442 IX86_BUILTIN_PSRAD128,
14443 IX86_BUILTIN_PSRLW128,
14444 IX86_BUILTIN_PSRLD128,
14445 IX86_BUILTIN_PSRLQ128,
14446 IX86_BUILTIN_PSLLDQI128,
14447 IX86_BUILTIN_PSLLWI128,
14448 IX86_BUILTIN_PSLLDI128,
14449 IX86_BUILTIN_PSLLQI128,
14450 IX86_BUILTIN_PSRAWI128,
14451 IX86_BUILTIN_PSRADI128,
14452 IX86_BUILTIN_PSRLDQI128,
14453 IX86_BUILTIN_PSRLWI128,
14454 IX86_BUILTIN_PSRLDI128,
14455 IX86_BUILTIN_PSRLQI128,
14457 IX86_BUILTIN_PUNPCKHBW128,
14458 IX86_BUILTIN_PUNPCKHWD128,
14459 IX86_BUILTIN_PUNPCKHDQ128,
14460 IX86_BUILTIN_PUNPCKHQDQ128,
14461 IX86_BUILTIN_PUNPCKLBW128,
14462 IX86_BUILTIN_PUNPCKLWD128,
14463 IX86_BUILTIN_PUNPCKLDQ128,
14464 IX86_BUILTIN_PUNPCKLQDQ128,
14466 IX86_BUILTIN_CLFLUSH,
14467 IX86_BUILTIN_MFENCE,
14468 IX86_BUILTIN_LFENCE,
14470 /* Prescott New Instructions. */
14471 IX86_BUILTIN_ADDSUBPS,
14472 IX86_BUILTIN_HADDPS,
14473 IX86_BUILTIN_HSUBPS,
14474 IX86_BUILTIN_MOVSHDUP,
14475 IX86_BUILTIN_MOVSLDUP,
14476 IX86_BUILTIN_ADDSUBPD,
14477 IX86_BUILTIN_HADDPD,
14478 IX86_BUILTIN_HSUBPD,
14479 IX86_BUILTIN_LDDQU,
14481 IX86_BUILTIN_MONITOR,
14482 IX86_BUILTIN_MWAIT,
14485 IX86_BUILTIN_PHADDW,
14486 IX86_BUILTIN_PHADDD,
14487 IX86_BUILTIN_PHADDSW,
14488 IX86_BUILTIN_PHSUBW,
14489 IX86_BUILTIN_PHSUBD,
14490 IX86_BUILTIN_PHSUBSW,
14491 IX86_BUILTIN_PMADDUBSW,
14492 IX86_BUILTIN_PMULHRSW,
14493 IX86_BUILTIN_PSHUFB,
14494 IX86_BUILTIN_PSIGNB,
14495 IX86_BUILTIN_PSIGNW,
14496 IX86_BUILTIN_PSIGND,
14497 IX86_BUILTIN_PALIGNR,
14498 IX86_BUILTIN_PABSB,
14499 IX86_BUILTIN_PABSW,
14500 IX86_BUILTIN_PABSD,
14502 IX86_BUILTIN_PHADDW128,
14503 IX86_BUILTIN_PHADDD128,
14504 IX86_BUILTIN_PHADDSW128,
14505 IX86_BUILTIN_PHSUBW128,
14506 IX86_BUILTIN_PHSUBD128,
14507 IX86_BUILTIN_PHSUBSW128,
14508 IX86_BUILTIN_PMADDUBSW128,
14509 IX86_BUILTIN_PMULHRSW128,
14510 IX86_BUILTIN_PSHUFB128,
14511 IX86_BUILTIN_PSIGNB128,
14512 IX86_BUILTIN_PSIGNW128,
14513 IX86_BUILTIN_PSIGND128,
14514 IX86_BUILTIN_PALIGNR128,
14515 IX86_BUILTIN_PABSB128,
14516 IX86_BUILTIN_PABSW128,
14517 IX86_BUILTIN_PABSD128,
14519 IX86_BUILTIN_VEC_INIT_V2SI,
14520 IX86_BUILTIN_VEC_INIT_V4HI,
14521 IX86_BUILTIN_VEC_INIT_V8QI,
14522 IX86_BUILTIN_VEC_EXT_V2DF,
14523 IX86_BUILTIN_VEC_EXT_V2DI,
14524 IX86_BUILTIN_VEC_EXT_V4SF,
14525 IX86_BUILTIN_VEC_EXT_V4SI,
14526 IX86_BUILTIN_VEC_EXT_V8HI,
14527 IX86_BUILTIN_VEC_EXT_V2SI,
14528 IX86_BUILTIN_VEC_EXT_V4HI,
14529 IX86_BUILTIN_VEC_SET_V8HI,
14530 IX86_BUILTIN_VEC_SET_V4HI,
14535 #define def_builtin(MASK, NAME, TYPE, CODE) \
14537 if ((MASK) & target_flags \
14538 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14539 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14540 NULL, NULL_TREE); \
14543 /* Bits for builtin_description.flag. */
14545 /* Set when we don't support the comparison natively, and should
14546 swap_comparison in order to support it. */
14547 #define BUILTIN_DESC_SWAP_OPERANDS 1
14549 struct builtin_description
14551 const unsigned int mask;
14552 const enum insn_code icode;
14553 const char *const name;
14554 const enum ix86_builtins code;
14555 const enum rtx_code comparison;
14556 const unsigned int flag;
14559 static const struct builtin_description bdesc_comi[] =
14561 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14562 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14563 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14564 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14565 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14566 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14567 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14568 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14569 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14570 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14571 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14572 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14573 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14574 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14575 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14576 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14578 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14579 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14580 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14581 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14582 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14583 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14584 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14587 static const struct builtin_description bdesc_2arg[] =
14590 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14591 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14592 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14593 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14594 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14595 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14596 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14597 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14599 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14600 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14601 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14602 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14603 BUILTIN_DESC_SWAP_OPERANDS },
14604 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14605 BUILTIN_DESC_SWAP_OPERANDS },
14606 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14607 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14608 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14609 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14610 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14611 BUILTIN_DESC_SWAP_OPERANDS },
14612 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14613 BUILTIN_DESC_SWAP_OPERANDS },
14614 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14615 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14616 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14617 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14618 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14619 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14620 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14621 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14622 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14623 BUILTIN_DESC_SWAP_OPERANDS },
14624 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14625 BUILTIN_DESC_SWAP_OPERANDS },
14626 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14628 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14629 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14630 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14631 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14633 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14634 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14635 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14636 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14638 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14639 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14640 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14641 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14642 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14645 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14646 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14647 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14648 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14649 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14650 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14651 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14652 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14654 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14655 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14656 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14657 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14658 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14659 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14660 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14661 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14663 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14664 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14665 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14667 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14668 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14669 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14670 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14672 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14673 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14675 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14676 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14677 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14678 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14679 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14680 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14682 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14683 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14684 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14685 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14687 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14688 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14689 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14690 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14691 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14692 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14695 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14696 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14697 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14699 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14700 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14701 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14703 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14704 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14705 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14706 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14707 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14708 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14710 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14711 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14712 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14713 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14714 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14715 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14718 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14719 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14720 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14722 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14723 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14726 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14727 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14728 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14729 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14730 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14731 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14732 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14733 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14735 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14736 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14737 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14738 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14739 BUILTIN_DESC_SWAP_OPERANDS },
14740 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14741 BUILTIN_DESC_SWAP_OPERANDS },
14742 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14743 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14744 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14745 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14746 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14747 BUILTIN_DESC_SWAP_OPERANDS },
14748 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14749 BUILTIN_DESC_SWAP_OPERANDS },
14750 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14751 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14752 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14753 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14754 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14755 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14756 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14757 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14758 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14760 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14761 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14762 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14763 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14765 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14766 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14767 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14768 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14770 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14771 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14772 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14775 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14777 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14779 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14780 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14781 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14782 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14784 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14785 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14786 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14787 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14788 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14789 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14790 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14791 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14793 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14794 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14796 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14797 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14798 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14799 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14801 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14806 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14807 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14808 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14809 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14811 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14812 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14813 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14814 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14818 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14822 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14825 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14826 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14827 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14829 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14830 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14832 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14833 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14836 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14839 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14840 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14841 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14843 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14844 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14846 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14848 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14849 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14850 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14851 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14854 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14855 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14856 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14857 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14858 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14859 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14862 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14863 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14864 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14865 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14866 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14867 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14868 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14869 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
14870 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
14871 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
14872 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
14873 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
14874 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
14875 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
14876 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
14877 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
14878 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
14879 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
14880 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
14881 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
14882 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
14883 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
14884 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
14885 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
14888 static const struct builtin_description bdesc_1arg[] =
14890 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14891 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14893 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14894 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14895 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14897 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14898 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14899 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14900 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14901 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14902 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14904 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14905 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14907 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14909 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14910 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14912 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14913 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14914 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14915 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14916 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14918 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14920 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14921 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14922 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14923 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14925 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14926 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14927 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14930 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14931 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14934 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
14935 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
14936 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
14937 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
14938 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
14939 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
14943 ix86_init_builtins (void)
14946 ix86_init_mmx_sse_builtins ();
14949 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14950 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14953 ix86_init_mmx_sse_builtins (void)
14955 const struct builtin_description * d;
14958 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14959 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14960 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14961 tree V2DI_type_node
14962 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14963 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14964 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14965 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14966 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14967 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14968 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14970 tree pchar_type_node = build_pointer_type (char_type_node);
14971 tree pcchar_type_node = build_pointer_type (
14972 build_type_variant (char_type_node, 1, 0));
14973 tree pfloat_type_node = build_pointer_type (float_type_node);
14974 tree pcfloat_type_node = build_pointer_type (
14975 build_type_variant (float_type_node, 1, 0));
14976 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14977 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14978 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14981 tree int_ftype_v4sf_v4sf
14982 = build_function_type_list (integer_type_node,
14983 V4SF_type_node, V4SF_type_node, NULL_TREE);
14984 tree v4si_ftype_v4sf_v4sf
14985 = build_function_type_list (V4SI_type_node,
14986 V4SF_type_node, V4SF_type_node, NULL_TREE);
14987 /* MMX/SSE/integer conversions. */
14988 tree int_ftype_v4sf
14989 = build_function_type_list (integer_type_node,
14990 V4SF_type_node, NULL_TREE);
14991 tree int64_ftype_v4sf
14992 = build_function_type_list (long_long_integer_type_node,
14993 V4SF_type_node, NULL_TREE);
14994 tree int_ftype_v8qi
14995 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14996 tree v4sf_ftype_v4sf_int
14997 = build_function_type_list (V4SF_type_node,
14998 V4SF_type_node, integer_type_node, NULL_TREE);
14999 tree v4sf_ftype_v4sf_int64
15000 = build_function_type_list (V4SF_type_node,
15001 V4SF_type_node, long_long_integer_type_node,
15003 tree v4sf_ftype_v4sf_v2si
15004 = build_function_type_list (V4SF_type_node,
15005 V4SF_type_node, V2SI_type_node, NULL_TREE);
15007 /* Miscellaneous. */
15008 tree v8qi_ftype_v4hi_v4hi
15009 = build_function_type_list (V8QI_type_node,
15010 V4HI_type_node, V4HI_type_node, NULL_TREE);
15011 tree v4hi_ftype_v2si_v2si
15012 = build_function_type_list (V4HI_type_node,
15013 V2SI_type_node, V2SI_type_node, NULL_TREE);
15014 tree v4sf_ftype_v4sf_v4sf_int
15015 = build_function_type_list (V4SF_type_node,
15016 V4SF_type_node, V4SF_type_node,
15017 integer_type_node, NULL_TREE);
15018 tree v2si_ftype_v4hi_v4hi
15019 = build_function_type_list (V2SI_type_node,
15020 V4HI_type_node, V4HI_type_node, NULL_TREE);
15021 tree v4hi_ftype_v4hi_int
15022 = build_function_type_list (V4HI_type_node,
15023 V4HI_type_node, integer_type_node, NULL_TREE);
15024 tree v4hi_ftype_v4hi_di
15025 = build_function_type_list (V4HI_type_node,
15026 V4HI_type_node, long_long_unsigned_type_node,
15028 tree v2si_ftype_v2si_di
15029 = build_function_type_list (V2SI_type_node,
15030 V2SI_type_node, long_long_unsigned_type_node,
15032 tree void_ftype_void
15033 = build_function_type (void_type_node, void_list_node);
15034 tree void_ftype_unsigned
15035 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15036 tree void_ftype_unsigned_unsigned
15037 = build_function_type_list (void_type_node, unsigned_type_node,
15038 unsigned_type_node, NULL_TREE);
15039 tree void_ftype_pcvoid_unsigned_unsigned
15040 = build_function_type_list (void_type_node, const_ptr_type_node,
15041 unsigned_type_node, unsigned_type_node,
15043 tree unsigned_ftype_void
15044 = build_function_type (unsigned_type_node, void_list_node);
15045 tree v2si_ftype_v4sf
15046 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15047 /* Loads/stores. */
15048 tree void_ftype_v8qi_v8qi_pchar
15049 = build_function_type_list (void_type_node,
15050 V8QI_type_node, V8QI_type_node,
15051 pchar_type_node, NULL_TREE);
15052 tree v4sf_ftype_pcfloat
15053 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15054 /* @@@ the type is bogus */
15055 tree v4sf_ftype_v4sf_pv2si
15056 = build_function_type_list (V4SF_type_node,
15057 V4SF_type_node, pv2si_type_node, NULL_TREE);
15058 tree void_ftype_pv2si_v4sf
15059 = build_function_type_list (void_type_node,
15060 pv2si_type_node, V4SF_type_node, NULL_TREE);
15061 tree void_ftype_pfloat_v4sf
15062 = build_function_type_list (void_type_node,
15063 pfloat_type_node, V4SF_type_node, NULL_TREE);
15064 tree void_ftype_pdi_di
15065 = build_function_type_list (void_type_node,
15066 pdi_type_node, long_long_unsigned_type_node,
15068 tree void_ftype_pv2di_v2di
15069 = build_function_type_list (void_type_node,
15070 pv2di_type_node, V2DI_type_node, NULL_TREE);
15071 /* Normal vector unops. */
15072 tree v4sf_ftype_v4sf
15073 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15074 tree v16qi_ftype_v16qi
15075 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15076 tree v8hi_ftype_v8hi
15077 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15078 tree v4si_ftype_v4si
15079 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15080 tree v8qi_ftype_v8qi
15081 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15082 tree v4hi_ftype_v4hi
15083 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15085 /* Normal vector binops. */
15086 tree v4sf_ftype_v4sf_v4sf
15087 = build_function_type_list (V4SF_type_node,
15088 V4SF_type_node, V4SF_type_node, NULL_TREE);
15089 tree v8qi_ftype_v8qi_v8qi
15090 = build_function_type_list (V8QI_type_node,
15091 V8QI_type_node, V8QI_type_node, NULL_TREE);
15092 tree v4hi_ftype_v4hi_v4hi
15093 = build_function_type_list (V4HI_type_node,
15094 V4HI_type_node, V4HI_type_node, NULL_TREE);
15095 tree v2si_ftype_v2si_v2si
15096 = build_function_type_list (V2SI_type_node,
15097 V2SI_type_node, V2SI_type_node, NULL_TREE);
15098 tree di_ftype_di_di
15099 = build_function_type_list (long_long_unsigned_type_node,
15100 long_long_unsigned_type_node,
15101 long_long_unsigned_type_node, NULL_TREE);
15103 tree di_ftype_di_di_int
15104 = build_function_type_list (long_long_unsigned_type_node,
15105 long_long_unsigned_type_node,
15106 long_long_unsigned_type_node,
15107 integer_type_node, NULL_TREE);
15109 tree v2si_ftype_v2sf
15110 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15111 tree v2sf_ftype_v2si
15112 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15113 tree v2si_ftype_v2si
15114 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15115 tree v2sf_ftype_v2sf
15116 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15117 tree v2sf_ftype_v2sf_v2sf
15118 = build_function_type_list (V2SF_type_node,
15119 V2SF_type_node, V2SF_type_node, NULL_TREE);
15120 tree v2si_ftype_v2sf_v2sf
15121 = build_function_type_list (V2SI_type_node,
15122 V2SF_type_node, V2SF_type_node, NULL_TREE);
15123 tree pint_type_node = build_pointer_type (integer_type_node);
15124 tree pdouble_type_node = build_pointer_type (double_type_node);
15125 tree pcdouble_type_node = build_pointer_type (
15126 build_type_variant (double_type_node, 1, 0));
15127 tree int_ftype_v2df_v2df
15128 = build_function_type_list (integer_type_node,
15129 V2DF_type_node, V2DF_type_node, NULL_TREE);
15131 tree void_ftype_pcvoid
15132 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15133 tree v4sf_ftype_v4si
15134 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15135 tree v4si_ftype_v4sf
15136 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15137 tree v2df_ftype_v4si
15138 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15139 tree v4si_ftype_v2df
15140 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15141 tree v2si_ftype_v2df
15142 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15143 tree v4sf_ftype_v2df
15144 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15145 tree v2df_ftype_v2si
15146 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15147 tree v2df_ftype_v4sf
15148 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15149 tree int_ftype_v2df
15150 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15151 tree int64_ftype_v2df
15152 = build_function_type_list (long_long_integer_type_node,
15153 V2DF_type_node, NULL_TREE);
15154 tree v2df_ftype_v2df_int
15155 = build_function_type_list (V2DF_type_node,
15156 V2DF_type_node, integer_type_node, NULL_TREE);
15157 tree v2df_ftype_v2df_int64
15158 = build_function_type_list (V2DF_type_node,
15159 V2DF_type_node, long_long_integer_type_node,
15161 tree v4sf_ftype_v4sf_v2df
15162 = build_function_type_list (V4SF_type_node,
15163 V4SF_type_node, V2DF_type_node, NULL_TREE);
15164 tree v2df_ftype_v2df_v4sf
15165 = build_function_type_list (V2DF_type_node,
15166 V2DF_type_node, V4SF_type_node, NULL_TREE);
15167 tree v2df_ftype_v2df_v2df_int
15168 = build_function_type_list (V2DF_type_node,
15169 V2DF_type_node, V2DF_type_node,
15172 tree v2df_ftype_v2df_pcdouble
15173 = build_function_type_list (V2DF_type_node,
15174 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15175 tree void_ftype_pdouble_v2df
15176 = build_function_type_list (void_type_node,
15177 pdouble_type_node, V2DF_type_node, NULL_TREE);
15178 tree void_ftype_pint_int
15179 = build_function_type_list (void_type_node,
15180 pint_type_node, integer_type_node, NULL_TREE);
15181 tree void_ftype_v16qi_v16qi_pchar
15182 = build_function_type_list (void_type_node,
15183 V16QI_type_node, V16QI_type_node,
15184 pchar_type_node, NULL_TREE);
15185 tree v2df_ftype_pcdouble
15186 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15187 tree v2df_ftype_v2df_v2df
15188 = build_function_type_list (V2DF_type_node,
15189 V2DF_type_node, V2DF_type_node, NULL_TREE);
15190 tree v16qi_ftype_v16qi_v16qi
15191 = build_function_type_list (V16QI_type_node,
15192 V16QI_type_node, V16QI_type_node, NULL_TREE);
15193 tree v8hi_ftype_v8hi_v8hi
15194 = build_function_type_list (V8HI_type_node,
15195 V8HI_type_node, V8HI_type_node, NULL_TREE);
15196 tree v4si_ftype_v4si_v4si
15197 = build_function_type_list (V4SI_type_node,
15198 V4SI_type_node, V4SI_type_node, NULL_TREE);
15199 tree v2di_ftype_v2di_v2di
15200 = build_function_type_list (V2DI_type_node,
15201 V2DI_type_node, V2DI_type_node, NULL_TREE);
15202 tree v2di_ftype_v2df_v2df
15203 = build_function_type_list (V2DI_type_node,
15204 V2DF_type_node, V2DF_type_node, NULL_TREE);
15205 tree v2df_ftype_v2df
15206 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15207 tree v2di_ftype_v2di_int
15208 = build_function_type_list (V2DI_type_node,
15209 V2DI_type_node, integer_type_node, NULL_TREE);
15210 tree v2di_ftype_v2di_v2di_int
15211 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15212 V2DI_type_node, integer_type_node, NULL_TREE);
15213 tree v4si_ftype_v4si_int
15214 = build_function_type_list (V4SI_type_node,
15215 V4SI_type_node, integer_type_node, NULL_TREE);
15216 tree v8hi_ftype_v8hi_int
15217 = build_function_type_list (V8HI_type_node,
15218 V8HI_type_node, integer_type_node, NULL_TREE);
15219 tree v8hi_ftype_v8hi_v2di
15220 = build_function_type_list (V8HI_type_node,
15221 V8HI_type_node, V2DI_type_node, NULL_TREE);
15222 tree v4si_ftype_v4si_v2di
15223 = build_function_type_list (V4SI_type_node,
15224 V4SI_type_node, V2DI_type_node, NULL_TREE);
15225 tree v4si_ftype_v8hi_v8hi
15226 = build_function_type_list (V4SI_type_node,
15227 V8HI_type_node, V8HI_type_node, NULL_TREE);
15228 tree di_ftype_v8qi_v8qi
15229 = build_function_type_list (long_long_unsigned_type_node,
15230 V8QI_type_node, V8QI_type_node, NULL_TREE);
15231 tree di_ftype_v2si_v2si
15232 = build_function_type_list (long_long_unsigned_type_node,
15233 V2SI_type_node, V2SI_type_node, NULL_TREE);
15234 tree v2di_ftype_v16qi_v16qi
15235 = build_function_type_list (V2DI_type_node,
15236 V16QI_type_node, V16QI_type_node, NULL_TREE);
15237 tree v2di_ftype_v4si_v4si
15238 = build_function_type_list (V2DI_type_node,
15239 V4SI_type_node, V4SI_type_node, NULL_TREE);
15240 tree int_ftype_v16qi
15241 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15242 tree v16qi_ftype_pcchar
15243 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15244 tree void_ftype_pchar_v16qi
15245 = build_function_type_list (void_type_node,
15246 pchar_type_node, V16QI_type_node, NULL_TREE);
15249 tree float128_type;
15252 /* The __float80 type. */
15253 if (TYPE_MODE (long_double_type_node) == XFmode)
15254 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15258 /* The __float80 type. */
15259 float80_type = make_node (REAL_TYPE);
15260 TYPE_PRECISION (float80_type) = 80;
15261 layout_type (float80_type);
15262 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15267 float128_type = make_node (REAL_TYPE);
15268 TYPE_PRECISION (float128_type) = 128;
15269 layout_type (float128_type);
15270 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15273 /* Add all builtins that are more or less simple operations on two
15275 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15277 /* Use one of the operands; the target can have a different mode for
15278 mask-generating compares. */
15279 enum machine_mode mode;
15284 mode = insn_data[d->icode].operand[1].mode;
15289 type = v16qi_ftype_v16qi_v16qi;
15292 type = v8hi_ftype_v8hi_v8hi;
15295 type = v4si_ftype_v4si_v4si;
15298 type = v2di_ftype_v2di_v2di;
15301 type = v2df_ftype_v2df_v2df;
15304 type = v4sf_ftype_v4sf_v4sf;
15307 type = v8qi_ftype_v8qi_v8qi;
15310 type = v4hi_ftype_v4hi_v4hi;
15313 type = v2si_ftype_v2si_v2si;
15316 type = di_ftype_di_di;
15320 gcc_unreachable ();
15323 /* Override for comparisons. */
15324 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15325 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15326 type = v4si_ftype_v4sf_v4sf;
15328 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15329 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15330 type = v2di_ftype_v2df_v2df;
15332 def_builtin (d->mask, d->name, type, d->code);
15335 /* Add all builtins that are more or less simple operations on 1 operand. */
15336 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15338 enum machine_mode mode;
15343 mode = insn_data[d->icode].operand[1].mode;
15348 type = v16qi_ftype_v16qi;
15351 type = v8hi_ftype_v8hi;
15354 type = v4si_ftype_v4si;
15357 type = v2df_ftype_v2df;
15360 type = v4sf_ftype_v4sf;
15363 type = v8qi_ftype_v8qi;
15366 type = v4hi_ftype_v4hi;
15369 type = v2si_ftype_v2si;
15376 def_builtin (d->mask, d->name, type, d->code);
15379 /* Add the remaining MMX insns with somewhat more complicated types. */
15380 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15381 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15382 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15383 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15385 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15386 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15387 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15389 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15390 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15392 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15393 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15395 /* comi/ucomi insns. */
15396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15397 if (d->mask == MASK_SSE2)
15398 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15400 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15402 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15403 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15404 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15406 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15407 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15408 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15409 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15410 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15411 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15412 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15413 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15414 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15415 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15416 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15418 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15420 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15421 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15423 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15424 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15425 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15426 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15428 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15429 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15430 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15431 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15433 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15435 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15437 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15438 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15439 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15440 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15441 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15442 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15444 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15446 /* Original 3DNow! */
15447 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15448 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15449 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15450 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15451 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15452 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15453 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15454 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15455 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15465 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15466 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15468 /* 3DNow! extension as used in the Athlon CPU. */
15469 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15470 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15471 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15472 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15473 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15474 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15477 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15479 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15480 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15482 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15483 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15485 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15486 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15487 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15488 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15489 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15491 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15492 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15493 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15494 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15496 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15497 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15499 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15501 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15502 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15504 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15505 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15506 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15507 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15508 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15510 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15512 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15513 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15514 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15515 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15517 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15518 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15519 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15521 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15522 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15523 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15524 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15526 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15527 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15528 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15530 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15531 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15533 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15534 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15536 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15537 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15538 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15540 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15541 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15542 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15544 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15545 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15547 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15548 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15549 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15550 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15552 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15553 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15554 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15555 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15557 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15558 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15560 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15562 /* Prescott New Instructions. */
15563 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15564 void_ftype_pcvoid_unsigned_unsigned,
15565 IX86_BUILTIN_MONITOR);
15566 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15567 void_ftype_unsigned_unsigned,
15568 IX86_BUILTIN_MWAIT);
15569 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15571 IX86_BUILTIN_MOVSHDUP);
15572 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15574 IX86_BUILTIN_MOVSLDUP);
15575 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15576 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15579 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15580 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15581 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15582 IX86_BUILTIN_PALIGNR);
15584 /* Access to the vec_init patterns. */
15585 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15586 integer_type_node, NULL_TREE);
15587 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15588 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15590 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15591 short_integer_type_node,
15592 short_integer_type_node,
15593 short_integer_type_node, NULL_TREE);
15594 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15595 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15597 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15598 char_type_node, char_type_node,
15599 char_type_node, char_type_node,
15600 char_type_node, char_type_node,
15601 char_type_node, NULL_TREE);
15602 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15603 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15605 /* Access to the vec_extract patterns. */
15606 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15607 integer_type_node, NULL_TREE);
15608 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15609 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15611 ftype = build_function_type_list (long_long_integer_type_node,
15612 V2DI_type_node, integer_type_node,
15614 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15615 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15617 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15618 integer_type_node, NULL_TREE);
15619 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15620 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15622 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15623 integer_type_node, NULL_TREE);
15624 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15625 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15627 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15628 integer_type_node, NULL_TREE);
15629 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15630 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15632 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15633 integer_type_node, NULL_TREE);
15634 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15635 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15637 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15638 integer_type_node, NULL_TREE);
15639 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15640 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15642 /* Access to the vec_set patterns. */
15643 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15645 integer_type_node, NULL_TREE);
15646 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15647 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15649 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15651 integer_type_node, NULL_TREE);
15652 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15653 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15656 /* Errors in the source file can cause expand_expr to return const0_rtx
15657 where we expect a vector. To avoid crashing, use one of the vector
15658 clear instructions. */
15660 safe_vector_operand (rtx x, enum machine_mode mode)
15662 if (x == const0_rtx)
15663 x = CONST0_RTX (mode);
15667 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15670 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15673 tree arg0 = TREE_VALUE (arglist);
15674 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15675 rtx op0 = expand_normal (arg0);
15676 rtx op1 = expand_normal (arg1);
15677 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15678 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15679 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15681 if (VECTOR_MODE_P (mode0))
15682 op0 = safe_vector_operand (op0, mode0);
15683 if (VECTOR_MODE_P (mode1))
15684 op1 = safe_vector_operand (op1, mode1);
15686 if (optimize || !target
15687 || GET_MODE (target) != tmode
15688 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15689 target = gen_reg_rtx (tmode);
15691 if (GET_MODE (op1) == SImode && mode1 == TImode)
15693 rtx x = gen_reg_rtx (V4SImode);
15694 emit_insn (gen_sse2_loadd (x, op1));
15695 op1 = gen_lowpart (TImode, x);
15698 /* The insn must want input operands in the same modes as the
15700 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15701 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15703 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15704 op0 = copy_to_mode_reg (mode0, op0);
15705 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15706 op1 = copy_to_mode_reg (mode1, op1);
15708 /* ??? Using ix86_fixup_binary_operands is problematic when
15709 we've got mismatched modes. Fake it. */
15715 if (tmode == mode0 && tmode == mode1)
15717 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15721 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15723 op0 = force_reg (mode0, op0);
15724 op1 = force_reg (mode1, op1);
15725 target = gen_reg_rtx (tmode);
15728 pat = GEN_FCN (icode) (target, op0, op1);
15735 /* Subroutine of ix86_expand_builtin to take care of stores. */
15738 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15741 tree arg0 = TREE_VALUE (arglist);
15742 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15743 rtx op0 = expand_normal (arg0);
15744 rtx op1 = expand_normal (arg1);
15745 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15746 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15748 if (VECTOR_MODE_P (mode1))
15749 op1 = safe_vector_operand (op1, mode1);
15751 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15752 op1 = copy_to_mode_reg (mode1, op1);
15754 pat = GEN_FCN (icode) (op0, op1);
15760 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15763 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15764 rtx target, int do_load)
15767 tree arg0 = TREE_VALUE (arglist);
15768 rtx op0 = expand_normal (arg0);
15769 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15770 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15772 if (optimize || !target
15773 || GET_MODE (target) != tmode
15774 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15775 target = gen_reg_rtx (tmode);
15777 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15780 if (VECTOR_MODE_P (mode0))
15781 op0 = safe_vector_operand (op0, mode0);
15783 if ((optimize && !register_operand (op0, mode0))
15784 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15785 op0 = copy_to_mode_reg (mode0, op0);
15788 pat = GEN_FCN (icode) (target, op0);
15795 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15796 sqrtss, rsqrtss, rcpss. */
15799 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15802 tree arg0 = TREE_VALUE (arglist);
15803 rtx op1, op0 = expand_normal (arg0);
15804 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15805 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15807 if (optimize || !target
15808 || GET_MODE (target) != tmode
15809 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15810 target = gen_reg_rtx (tmode);
15812 if (VECTOR_MODE_P (mode0))
15813 op0 = safe_vector_operand (op0, mode0);
15815 if ((optimize && !register_operand (op0, mode0))
15816 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15817 op0 = copy_to_mode_reg (mode0, op0);
15820 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15821 op1 = copy_to_mode_reg (mode0, op1);
15823 pat = GEN_FCN (icode) (target, op0, op1);
15830 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15833 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15837 tree arg0 = TREE_VALUE (arglist);
15838 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15839 rtx op0 = expand_normal (arg0);
15840 rtx op1 = expand_normal (arg1);
15842 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15843 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15844 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15845 enum rtx_code comparison = d->comparison;
15847 if (VECTOR_MODE_P (mode0))
15848 op0 = safe_vector_operand (op0, mode0);
15849 if (VECTOR_MODE_P (mode1))
15850 op1 = safe_vector_operand (op1, mode1);
15852 /* Swap operands if we have a comparison that isn't available in
15854 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15856 rtx tmp = gen_reg_rtx (mode1);
15857 emit_move_insn (tmp, op1);
15862 if (optimize || !target
15863 || GET_MODE (target) != tmode
15864 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15865 target = gen_reg_rtx (tmode);
15867 if ((optimize && !register_operand (op0, mode0))
15868 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15869 op0 = copy_to_mode_reg (mode0, op0);
15870 if ((optimize && !register_operand (op1, mode1))
15871 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15872 op1 = copy_to_mode_reg (mode1, op1);
15874 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15875 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15882 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15885 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15889 tree arg0 = TREE_VALUE (arglist);
15890 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15891 rtx op0 = expand_normal (arg0);
15892 rtx op1 = expand_normal (arg1);
15894 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15895 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15896 enum rtx_code comparison = d->comparison;
15898 if (VECTOR_MODE_P (mode0))
15899 op0 = safe_vector_operand (op0, mode0);
15900 if (VECTOR_MODE_P (mode1))
15901 op1 = safe_vector_operand (op1, mode1);
15903 /* Swap operands if we have a comparison that isn't available in
15905 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15912 target = gen_reg_rtx (SImode);
15913 emit_move_insn (target, const0_rtx);
15914 target = gen_rtx_SUBREG (QImode, target, 0);
15916 if ((optimize && !register_operand (op0, mode0))
15917 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15918 op0 = copy_to_mode_reg (mode0, op0);
15919 if ((optimize && !register_operand (op1, mode1))
15920 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15921 op1 = copy_to_mode_reg (mode1, op1);
15923 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15924 pat = GEN_FCN (d->icode) (op0, op1);
15928 emit_insn (gen_rtx_SET (VOIDmode,
15929 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15930 gen_rtx_fmt_ee (comparison, QImode,
15934 return SUBREG_REG (target);
15937 /* Return the integer constant in ARG. Constrain it to be in the range
15938 of the subparts of VEC_TYPE; issue an error if not. */
15941 get_element_number (tree vec_type, tree arg)
15943 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15945 if (!host_integerp (arg, 1)
15946 || (elt = tree_low_cst (arg, 1), elt > max))
15948 error ("selector must be an integer constant in the range 0..%wi", max);
15955 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15956 ix86_expand_vector_init. We DO have language-level syntax for this, in
15957 the form of (type){ init-list }. Except that since we can't place emms
15958 instructions from inside the compiler, we can't allow the use of MMX
15959 registers unless the user explicitly asks for it. So we do *not* define
15960 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15961 we have builtins invoked by mmintrin.h that gives us license to emit
15962 these sorts of instructions. */
15965 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15967 enum machine_mode tmode = TYPE_MODE (type);
15968 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15969 int i, n_elt = GET_MODE_NUNITS (tmode);
15970 rtvec v = rtvec_alloc (n_elt);
15972 gcc_assert (VECTOR_MODE_P (tmode));
15974 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15976 rtx x = expand_normal (TREE_VALUE (arglist));
15977 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15980 gcc_assert (arglist == NULL);
15982 if (!target || !register_operand (target, tmode))
15983 target = gen_reg_rtx (tmode);
15985 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15989 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15990 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15991 had a language-level syntax for referencing vector elements. */
15994 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15996 enum machine_mode tmode, mode0;
16001 arg0 = TREE_VALUE (arglist);
16002 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16004 op0 = expand_normal (arg0);
16005 elt = get_element_number (TREE_TYPE (arg0), arg1);
16007 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16008 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16009 gcc_assert (VECTOR_MODE_P (mode0));
16011 op0 = force_reg (mode0, op0);
16013 if (optimize || !target || !register_operand (target, tmode))
16014 target = gen_reg_rtx (tmode);
16016 ix86_expand_vector_extract (true, target, op0, elt);
16021 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16022 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16023 a language-level syntax for referencing vector elements. */
16026 ix86_expand_vec_set_builtin (tree arglist)
16028 enum machine_mode tmode, mode1;
16029 tree arg0, arg1, arg2;
16033 arg0 = TREE_VALUE (arglist);
16034 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16035 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16037 tmode = TYPE_MODE (TREE_TYPE (arg0));
16038 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16039 gcc_assert (VECTOR_MODE_P (tmode));
16041 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16042 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16043 elt = get_element_number (TREE_TYPE (arg0), arg2);
16045 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16046 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16048 op0 = force_reg (tmode, op0);
16049 op1 = force_reg (mode1, op1);
16051 ix86_expand_vector_set (true, op0, op1, elt);
16056 /* Expand an expression EXP that calls a built-in function,
16057 with result going to TARGET if that's convenient
16058 (and in mode MODE if that's convenient).
16059 SUBTARGET may be used as the target for computing one of EXP's operands.
16060 IGNORE is nonzero if the value is to be ignored. */
16063 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16064 enum machine_mode mode ATTRIBUTE_UNUSED,
16065 int ignore ATTRIBUTE_UNUSED)
16067 const struct builtin_description *d;
16069 enum insn_code icode;
16070 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16071 tree arglist = TREE_OPERAND (exp, 1);
16072 tree arg0, arg1, arg2;
16073 rtx op0, op1, op2, pat;
16074 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16075 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16079 case IX86_BUILTIN_EMMS:
16080 emit_insn (gen_mmx_emms ());
16083 case IX86_BUILTIN_SFENCE:
16084 emit_insn (gen_sse_sfence ());
16087 case IX86_BUILTIN_MASKMOVQ:
16088 case IX86_BUILTIN_MASKMOVDQU:
16089 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16090 ? CODE_FOR_mmx_maskmovq
16091 : CODE_FOR_sse2_maskmovdqu);
16092 /* Note the arg order is different from the operand order. */
16093 arg1 = TREE_VALUE (arglist);
16094 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16095 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16096 op0 = expand_normal (arg0);
16097 op1 = expand_normal (arg1);
16098 op2 = expand_normal (arg2);
16099 mode0 = insn_data[icode].operand[0].mode;
16100 mode1 = insn_data[icode].operand[1].mode;
16101 mode2 = insn_data[icode].operand[2].mode;
16103 op0 = force_reg (Pmode, op0);
16104 op0 = gen_rtx_MEM (mode1, op0);
16106 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16107 op0 = copy_to_mode_reg (mode0, op0);
16108 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16109 op1 = copy_to_mode_reg (mode1, op1);
16110 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16111 op2 = copy_to_mode_reg (mode2, op2);
16112 pat = GEN_FCN (icode) (op0, op1, op2);
16118 case IX86_BUILTIN_SQRTSS:
16119 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16120 case IX86_BUILTIN_RSQRTSS:
16121 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16122 case IX86_BUILTIN_RCPSS:
16123 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16125 case IX86_BUILTIN_LOADUPS:
16126 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16128 case IX86_BUILTIN_STOREUPS:
16129 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16131 case IX86_BUILTIN_LOADHPS:
16132 case IX86_BUILTIN_LOADLPS:
16133 case IX86_BUILTIN_LOADHPD:
16134 case IX86_BUILTIN_LOADLPD:
16135 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16136 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16137 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16138 : CODE_FOR_sse2_loadlpd);
16139 arg0 = TREE_VALUE (arglist);
16140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16141 op0 = expand_normal (arg0);
16142 op1 = expand_normal (arg1);
16143 tmode = insn_data[icode].operand[0].mode;
16144 mode0 = insn_data[icode].operand[1].mode;
16145 mode1 = insn_data[icode].operand[2].mode;
16147 op0 = force_reg (mode0, op0);
16148 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16149 if (optimize || target == 0
16150 || GET_MODE (target) != tmode
16151 || !register_operand (target, tmode))
16152 target = gen_reg_rtx (tmode);
16153 pat = GEN_FCN (icode) (target, op0, op1);
16159 case IX86_BUILTIN_STOREHPS:
16160 case IX86_BUILTIN_STORELPS:
16161 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16162 : CODE_FOR_sse_storelps);
16163 arg0 = TREE_VALUE (arglist);
16164 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16165 op0 = expand_normal (arg0);
16166 op1 = expand_normal (arg1);
16167 mode0 = insn_data[icode].operand[0].mode;
16168 mode1 = insn_data[icode].operand[1].mode;
16170 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16171 op1 = force_reg (mode1, op1);
16173 pat = GEN_FCN (icode) (op0, op1);
16179 case IX86_BUILTIN_MOVNTPS:
16180 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16181 case IX86_BUILTIN_MOVNTQ:
16182 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16184 case IX86_BUILTIN_LDMXCSR:
16185 op0 = expand_normal (TREE_VALUE (arglist));
16186 target = assign_386_stack_local (SImode, SLOT_TEMP);
16187 emit_move_insn (target, op0);
16188 emit_insn (gen_sse_ldmxcsr (target));
16191 case IX86_BUILTIN_STMXCSR:
16192 target = assign_386_stack_local (SImode, SLOT_TEMP);
16193 emit_insn (gen_sse_stmxcsr (target));
16194 return copy_to_mode_reg (SImode, target);
16196 case IX86_BUILTIN_SHUFPS:
16197 case IX86_BUILTIN_SHUFPD:
16198 icode = (fcode == IX86_BUILTIN_SHUFPS
16199 ? CODE_FOR_sse_shufps
16200 : CODE_FOR_sse2_shufpd);
16201 arg0 = TREE_VALUE (arglist);
16202 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16203 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16204 op0 = expand_normal (arg0);
16205 op1 = expand_normal (arg1);
16206 op2 = expand_normal (arg2);
16207 tmode = insn_data[icode].operand[0].mode;
16208 mode0 = insn_data[icode].operand[1].mode;
16209 mode1 = insn_data[icode].operand[2].mode;
16210 mode2 = insn_data[icode].operand[3].mode;
16212 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16213 op0 = copy_to_mode_reg (mode0, op0);
16214 if ((optimize && !register_operand (op1, mode1))
16215 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16216 op1 = copy_to_mode_reg (mode1, op1);
16217 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16219 /* @@@ better error message */
16220 error ("mask must be an immediate");
16221 return gen_reg_rtx (tmode);
16223 if (optimize || target == 0
16224 || GET_MODE (target) != tmode
16225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16226 target = gen_reg_rtx (tmode);
16227 pat = GEN_FCN (icode) (target, op0, op1, op2);
16233 case IX86_BUILTIN_PSHUFW:
16234 case IX86_BUILTIN_PSHUFD:
16235 case IX86_BUILTIN_PSHUFHW:
16236 case IX86_BUILTIN_PSHUFLW:
16237 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16238 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16239 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16240 : CODE_FOR_mmx_pshufw);
16241 arg0 = TREE_VALUE (arglist);
16242 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16243 op0 = expand_normal (arg0);
16244 op1 = expand_normal (arg1);
16245 tmode = insn_data[icode].operand[0].mode;
16246 mode1 = insn_data[icode].operand[1].mode;
16247 mode2 = insn_data[icode].operand[2].mode;
16249 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16250 op0 = copy_to_mode_reg (mode1, op0);
16251 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16253 /* @@@ better error message */
16254 error ("mask must be an immediate");
16258 || GET_MODE (target) != tmode
16259 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16260 target = gen_reg_rtx (tmode);
16261 pat = GEN_FCN (icode) (target, op0, op1);
16267 case IX86_BUILTIN_PSLLDQI128:
16268 case IX86_BUILTIN_PSRLDQI128:
16269 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16270 : CODE_FOR_sse2_lshrti3);
16271 arg0 = TREE_VALUE (arglist);
16272 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16273 op0 = expand_normal (arg0);
16274 op1 = expand_normal (arg1);
16275 tmode = insn_data[icode].operand[0].mode;
16276 mode1 = insn_data[icode].operand[1].mode;
16277 mode2 = insn_data[icode].operand[2].mode;
16279 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16281 op0 = copy_to_reg (op0);
16282 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16284 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16286 error ("shift must be an immediate");
16289 target = gen_reg_rtx (V2DImode);
16290 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16296 case IX86_BUILTIN_FEMMS:
16297 emit_insn (gen_mmx_femms ());
16300 case IX86_BUILTIN_PAVGUSB:
16301 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16303 case IX86_BUILTIN_PF2ID:
16304 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16306 case IX86_BUILTIN_PFACC:
16307 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16309 case IX86_BUILTIN_PFADD:
16310 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16312 case IX86_BUILTIN_PFCMPEQ:
16313 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16315 case IX86_BUILTIN_PFCMPGE:
16316 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16318 case IX86_BUILTIN_PFCMPGT:
16319 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16321 case IX86_BUILTIN_PFMAX:
16322 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16324 case IX86_BUILTIN_PFMIN:
16325 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16327 case IX86_BUILTIN_PFMUL:
16328 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16330 case IX86_BUILTIN_PFRCP:
16331 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16333 case IX86_BUILTIN_PFRCPIT1:
16334 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16336 case IX86_BUILTIN_PFRCPIT2:
16337 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16339 case IX86_BUILTIN_PFRSQIT1:
16340 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16342 case IX86_BUILTIN_PFRSQRT:
16343 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16345 case IX86_BUILTIN_PFSUB:
16346 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16348 case IX86_BUILTIN_PFSUBR:
16349 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16351 case IX86_BUILTIN_PI2FD:
16352 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16354 case IX86_BUILTIN_PMULHRW:
16355 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16357 case IX86_BUILTIN_PF2IW:
16358 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16360 case IX86_BUILTIN_PFNACC:
16361 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16363 case IX86_BUILTIN_PFPNACC:
16364 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16366 case IX86_BUILTIN_PI2FW:
16367 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16369 case IX86_BUILTIN_PSWAPDSI:
16370 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16372 case IX86_BUILTIN_PSWAPDSF:
16373 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16375 case IX86_BUILTIN_SQRTSD:
16376 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16377 case IX86_BUILTIN_LOADUPD:
16378 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16379 case IX86_BUILTIN_STOREUPD:
16380 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16382 case IX86_BUILTIN_MFENCE:
16383 emit_insn (gen_sse2_mfence ());
16385 case IX86_BUILTIN_LFENCE:
16386 emit_insn (gen_sse2_lfence ());
16389 case IX86_BUILTIN_CLFLUSH:
16390 arg0 = TREE_VALUE (arglist);
16391 op0 = expand_normal (arg0);
16392 icode = CODE_FOR_sse2_clflush;
16393 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16394 op0 = copy_to_mode_reg (Pmode, op0);
16396 emit_insn (gen_sse2_clflush (op0));
16399 case IX86_BUILTIN_MOVNTPD:
16400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16401 case IX86_BUILTIN_MOVNTDQ:
16402 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16403 case IX86_BUILTIN_MOVNTI:
16404 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16406 case IX86_BUILTIN_LOADDQU:
16407 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16408 case IX86_BUILTIN_STOREDQU:
16409 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16411 case IX86_BUILTIN_MONITOR:
16412 arg0 = TREE_VALUE (arglist);
16413 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16414 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16415 op0 = expand_normal (arg0);
16416 op1 = expand_normal (arg1);
16417 op2 = expand_normal (arg2);
16419 op0 = copy_to_mode_reg (Pmode, op0);
16421 op1 = copy_to_mode_reg (SImode, op1);
16423 op2 = copy_to_mode_reg (SImode, op2);
16425 emit_insn (gen_sse3_monitor (op0, op1, op2));
16427 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16430 case IX86_BUILTIN_MWAIT:
16431 arg0 = TREE_VALUE (arglist);
16432 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16433 op0 = expand_normal (arg0);
16434 op1 = expand_normal (arg1);
16436 op0 = copy_to_mode_reg (SImode, op0);
16438 op1 = copy_to_mode_reg (SImode, op1);
16439 emit_insn (gen_sse3_mwait (op0, op1));
16442 case IX86_BUILTIN_LDDQU:
16443 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16446 case IX86_BUILTIN_PALIGNR:
16447 case IX86_BUILTIN_PALIGNR128:
16448 if (fcode == IX86_BUILTIN_PALIGNR)
16450 icode = CODE_FOR_ssse3_palignrdi;
16455 icode = CODE_FOR_ssse3_palignrti;
16458 arg0 = TREE_VALUE (arglist);
16459 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16460 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16461 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16462 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16463 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16464 tmode = insn_data[icode].operand[0].mode;
16465 mode1 = insn_data[icode].operand[1].mode;
16466 mode2 = insn_data[icode].operand[2].mode;
16467 mode3 = insn_data[icode].operand[3].mode;
16469 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16471 op0 = copy_to_reg (op0);
16472 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16474 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16476 op1 = copy_to_reg (op1);
16477 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16479 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16481 error ("shift must be an immediate");
16484 target = gen_reg_rtx (mode);
16485 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16492 case IX86_BUILTIN_VEC_INIT_V2SI:
16493 case IX86_BUILTIN_VEC_INIT_V4HI:
16494 case IX86_BUILTIN_VEC_INIT_V8QI:
16495 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16497 case IX86_BUILTIN_VEC_EXT_V2DF:
16498 case IX86_BUILTIN_VEC_EXT_V2DI:
16499 case IX86_BUILTIN_VEC_EXT_V4SF:
16500 case IX86_BUILTIN_VEC_EXT_V4SI:
16501 case IX86_BUILTIN_VEC_EXT_V8HI:
16502 case IX86_BUILTIN_VEC_EXT_V2SI:
16503 case IX86_BUILTIN_VEC_EXT_V4HI:
16504 return ix86_expand_vec_ext_builtin (arglist, target);
16506 case IX86_BUILTIN_VEC_SET_V8HI:
16507 case IX86_BUILTIN_VEC_SET_V4HI:
16508 return ix86_expand_vec_set_builtin (arglist);
16514 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16515 if (d->code == fcode)
16517 /* Compares are treated specially. */
16518 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16519 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16520 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16521 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16522 return ix86_expand_sse_compare (d, arglist, target);
16524 return ix86_expand_binop_builtin (d->icode, arglist, target);
16527 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16528 if (d->code == fcode)
16529 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16531 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16532 if (d->code == fcode)
16533 return ix86_expand_sse_comi (d, arglist, target);
16535 gcc_unreachable ();
16538 /* Store OPERAND to the memory after reload is completed. This means
16539 that we can't easily use assign_stack_local. */
16541 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16545 gcc_assert (reload_completed);
16546 if (TARGET_RED_ZONE)
16548 result = gen_rtx_MEM (mode,
16549 gen_rtx_PLUS (Pmode,
16551 GEN_INT (-RED_ZONE_SIZE)));
16552 emit_move_insn (result, operand);
16554 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16560 operand = gen_lowpart (DImode, operand);
16564 gen_rtx_SET (VOIDmode,
16565 gen_rtx_MEM (DImode,
16566 gen_rtx_PRE_DEC (DImode,
16567 stack_pointer_rtx)),
16571 gcc_unreachable ();
16573 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16582 split_di (&operand, 1, operands, operands + 1);
16584 gen_rtx_SET (VOIDmode,
16585 gen_rtx_MEM (SImode,
16586 gen_rtx_PRE_DEC (Pmode,
16587 stack_pointer_rtx)),
16590 gen_rtx_SET (VOIDmode,
16591 gen_rtx_MEM (SImode,
16592 gen_rtx_PRE_DEC (Pmode,
16593 stack_pointer_rtx)),
16598 /* Store HImodes as SImodes. */
16599 operand = gen_lowpart (SImode, operand);
16603 gen_rtx_SET (VOIDmode,
16604 gen_rtx_MEM (GET_MODE (operand),
16605 gen_rtx_PRE_DEC (SImode,
16606 stack_pointer_rtx)),
16610 gcc_unreachable ();
16612 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16617 /* Free operand from the memory. */
16619 ix86_free_from_memory (enum machine_mode mode)
16621 if (!TARGET_RED_ZONE)
16625 if (mode == DImode || TARGET_64BIT)
16629 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16630 to pop or add instruction if registers are available. */
16631 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16632 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16637 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16638 QImode must go into class Q_REGS.
16639 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16640 movdf to do mem-to-mem moves through integer regs. */
16642 ix86_preferred_reload_class (rtx x, enum reg_class class)
16644 enum machine_mode mode = GET_MODE (x);
16646 /* We're only allowed to return a subclass of CLASS. Many of the
16647 following checks fail for NO_REGS, so eliminate that early. */
16648 if (class == NO_REGS)
16651 /* All classes can load zeros. */
16652 if (x == CONST0_RTX (mode))
16655 /* Force constants into memory if we are loading a (nonzero) constant into
16656 an MMX or SSE register. This is because there are no MMX/SSE instructions
16657 to load from a constant. */
16659 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16662 /* Prefer SSE regs only, if we can use them for math. */
16663 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16664 return SSE_CLASS_P (class) ? class : NO_REGS;
16666 /* Floating-point constants need more complex checks. */
16667 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16669 /* General regs can load everything. */
16670 if (reg_class_subset_p (class, GENERAL_REGS))
16673 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16674 zero above. We only want to wind up preferring 80387 registers if
16675 we plan on doing computation with them. */
16677 && standard_80387_constant_p (x))
16679 /* Limit class to non-sse. */
16680 if (class == FLOAT_SSE_REGS)
16682 if (class == FP_TOP_SSE_REGS)
16684 if (class == FP_SECOND_SSE_REGS)
16685 return FP_SECOND_REG;
16686 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16693 /* Generally when we see PLUS here, it's the function invariant
16694 (plus soft-fp const_int). Which can only be computed into general
16696 if (GET_CODE (x) == PLUS)
16697 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16699 /* QImode constants are easy to load, but non-constant QImode data
16700 must go into Q_REGS. */
16701 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16703 if (reg_class_subset_p (class, Q_REGS))
16705 if (reg_class_subset_p (Q_REGS, class))
16713 /* Discourage putting floating-point values in SSE registers unless
16714 SSE math is being used, and likewise for the 387 registers. */
16716 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16718 enum machine_mode mode = GET_MODE (x);
16720 /* Restrict the output reload class to the register bank that we are doing
16721 math on. If we would like not to return a subset of CLASS, reject this
16722 alternative: if reload cannot do this, it will still use its choice. */
16723 mode = GET_MODE (x);
16724 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16725 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16727 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16729 if (class == FP_TOP_SSE_REGS)
16731 else if (class == FP_SECOND_SSE_REGS)
16732 return FP_SECOND_REG;
16734 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16740 /* If we are copying between general and FP registers, we need a memory
16741 location. The same is true for SSE and MMX registers.
16743 The macro can't work reliably when one of the CLASSES is class containing
16744 registers from multiple units (SSE, MMX, integer). We avoid this by never
16745 combining those units in single alternative in the machine description.
16746 Ensure that this constraint holds to avoid unexpected surprises.
16748 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16749 enforce these sanity checks. */
16752 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16753 enum machine_mode mode, int strict)
16755 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16756 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16757 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16758 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16759 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16760 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16762 gcc_assert (!strict);
16766 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16769 /* ??? This is a lie. We do have moves between mmx/general, and for
16770 mmx/sse2. But by saying we need secondary memory we discourage the
16771 register allocator from using the mmx registers unless needed. */
16772 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16775 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16777 /* SSE1 doesn't have any direct moves from other classes. */
16781 /* If the target says that inter-unit moves are more expensive
16782 than moving through memory, then don't generate them. */
16783 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16786 /* Between SSE and general, we have moves no larger than word size. */
16787 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16790 /* ??? For the cost of one register reformat penalty, we could use
16791 the same instructions to move SFmode and DFmode data, but the
16792 relevant move patterns don't support those alternatives. */
16793 if (mode == SFmode || mode == DFmode)
16800 /* Return true if the registers in CLASS cannot represent the change from
16801 modes FROM to TO. */
16804 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16805 enum reg_class class)
16810 /* x87 registers can't do subreg at all, as all values are reformatted
16811 to extended precision. */
16812 if (MAYBE_FLOAT_CLASS_P (class))
16815 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16817 /* Vector registers do not support QI or HImode loads. If we don't
16818 disallow a change to these modes, reload will assume it's ok to
16819 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16820 the vec_dupv4hi pattern. */
16821 if (GET_MODE_SIZE (from) < 4)
16824 /* Vector registers do not support subreg with nonzero offsets, which
16825 are otherwise valid for integer registers. Since we can't see
16826 whether we have a nonzero offset from here, prohibit all
16827 nonparadoxical subregs changing size. */
16828 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16835 /* Return the cost of moving data from a register in class CLASS1 to
16836 one in class CLASS2.
16838 It is not required that the cost always equal 2 when FROM is the same as TO;
16839 on some machines it is expensive to move between registers if they are not
16840 general registers. */
16843 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16844 enum reg_class class2)
16846 /* In case we require secondary memory, compute cost of the store followed
16847 by load. In order to avoid bad register allocation choices, we need
16848 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16850 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16854 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16855 MEMORY_MOVE_COST (mode, class1, 1));
16856 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16857 MEMORY_MOVE_COST (mode, class2, 1));
16859 /* In case of copying from general_purpose_register we may emit multiple
16860 stores followed by single load causing memory size mismatch stall.
16861 Count this as arbitrarily high cost of 20. */
16862 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16865 /* In the case of FP/MMX moves, the registers actually overlap, and we
16866 have to switch modes in order to treat them differently. */
16867 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16868 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16874 /* Moves between SSE/MMX and integer unit are expensive. */
16875 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16876 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16877 return ix86_cost->mmxsse_to_integer;
16878 if (MAYBE_FLOAT_CLASS_P (class1))
16879 return ix86_cost->fp_move;
16880 if (MAYBE_SSE_CLASS_P (class1))
16881 return ix86_cost->sse_move;
16882 if (MAYBE_MMX_CLASS_P (class1))
16883 return ix86_cost->mmx_move;
16887 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16890 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16892 /* Flags and only flags can only hold CCmode values. */
16893 if (CC_REGNO_P (regno))
16894 return GET_MODE_CLASS (mode) == MODE_CC;
16895 if (GET_MODE_CLASS (mode) == MODE_CC
16896 || GET_MODE_CLASS (mode) == MODE_RANDOM
16897 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16899 if (FP_REGNO_P (regno))
16900 return VALID_FP_MODE_P (mode);
16901 if (SSE_REGNO_P (regno))
16903 /* We implement the move patterns for all vector modes into and
16904 out of SSE registers, even when no operation instructions
16906 return (VALID_SSE_REG_MODE (mode)
16907 || VALID_SSE2_REG_MODE (mode)
16908 || VALID_MMX_REG_MODE (mode)
16909 || VALID_MMX_REG_MODE_3DNOW (mode));
16911 if (MMX_REGNO_P (regno))
16913 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16914 so if the register is available at all, then we can move data of
16915 the given mode into or out of it. */
16916 return (VALID_MMX_REG_MODE (mode)
16917 || VALID_MMX_REG_MODE_3DNOW (mode));
16920 if (mode == QImode)
16922 /* Take care for QImode values - they can be in non-QI regs,
16923 but then they do cause partial register stalls. */
16924 if (regno < 4 || TARGET_64BIT)
16926 if (!TARGET_PARTIAL_REG_STALL)
16928 return reload_in_progress || reload_completed;
16930 /* We handle both integer and floats in the general purpose registers. */
16931 else if (VALID_INT_MODE_P (mode))
16933 else if (VALID_FP_MODE_P (mode))
16935 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16936 on to use that value in smaller contexts, this can easily force a
16937 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16938 supporting DImode, allow it. */
16939 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16945 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16946 tieable integer mode. */
16949 ix86_tieable_integer_mode_p (enum machine_mode mode)
16958 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16961 return TARGET_64BIT;
16968 /* Return true if MODE1 is accessible in a register that can hold MODE2
16969 without copying. That is, all register classes that can hold MODE2
16970 can also hold MODE1. */
16973 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16975 if (mode1 == mode2)
16978 if (ix86_tieable_integer_mode_p (mode1)
16979 && ix86_tieable_integer_mode_p (mode2))
16982 /* MODE2 being XFmode implies fp stack or general regs, which means we
16983 can tie any smaller floating point modes to it. Note that we do not
16984 tie this with TFmode. */
16985 if (mode2 == XFmode)
16986 return mode1 == SFmode || mode1 == DFmode;
16988 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16989 that we can tie it with SFmode. */
16990 if (mode2 == DFmode)
16991 return mode1 == SFmode;
16993 /* If MODE2 is only appropriate for an SSE register, then tie with
16994 any other mode acceptable to SSE registers. */
16995 if (GET_MODE_SIZE (mode2) >= 8
16996 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16997 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16999 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17000 with any other mode acceptable to MMX registers. */
17001 if (GET_MODE_SIZE (mode2) == 8
17002 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17003 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17008 /* Return the cost of moving data of mode M between a
17009 register and memory. A value of 2 is the default; this cost is
17010 relative to those in `REGISTER_MOVE_COST'.
17012 If moving between registers and memory is more expensive than
17013 between two registers, you should define this macro to express the
17016 Model also increased moving costs of QImode registers in non
17020 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17022 if (FLOAT_CLASS_P (class))
17039 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17041 if (SSE_CLASS_P (class))
17044 switch (GET_MODE_SIZE (mode))
17058 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17060 if (MMX_CLASS_P (class))
17063 switch (GET_MODE_SIZE (mode))
17074 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17076 switch (GET_MODE_SIZE (mode))
17080 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17081 : ix86_cost->movzbl_load);
17083 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17084 : ix86_cost->int_store[0] + 4);
17087 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17089 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17090 if (mode == TFmode)
17092 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17093 * (((int) GET_MODE_SIZE (mode)
17094 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17098 /* Compute a (partial) cost for rtx X. Return true if the complete
17099 cost has been computed, and false if subexpressions should be
17100 scanned. In either case, *TOTAL contains the cost result. */
17103 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17105 enum machine_mode mode = GET_MODE (x);
17113 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17115 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17117 else if (flag_pic && SYMBOLIC_CONST (x)
17119 || (!GET_CODE (x) != LABEL_REF
17120 && (GET_CODE (x) != SYMBOL_REF
17121 || !SYMBOL_REF_LOCAL_P (x)))))
17128 if (mode == VOIDmode)
17131 switch (standard_80387_constant_p (x))
17136 default: /* Other constants */
17141 /* Start with (MEM (SYMBOL_REF)), since that's where
17142 it'll probably end up. Add a penalty for size. */
17143 *total = (COSTS_N_INSNS (1)
17144 + (flag_pic != 0 && !TARGET_64BIT)
17145 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17151 /* The zero extensions is often completely free on x86_64, so make
17152 it as cheap as possible. */
17153 if (TARGET_64BIT && mode == DImode
17154 && GET_MODE (XEXP (x, 0)) == SImode)
17156 else if (TARGET_ZERO_EXTEND_WITH_AND)
17157 *total = ix86_cost->add;
17159 *total = ix86_cost->movzx;
17163 *total = ix86_cost->movsx;
17167 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17168 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17170 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17173 *total = ix86_cost->add;
17176 if ((value == 2 || value == 3)
17177 && ix86_cost->lea <= ix86_cost->shift_const)
17179 *total = ix86_cost->lea;
17189 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17191 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17193 if (INTVAL (XEXP (x, 1)) > 32)
17194 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17196 *total = ix86_cost->shift_const * 2;
17200 if (GET_CODE (XEXP (x, 1)) == AND)
17201 *total = ix86_cost->shift_var * 2;
17203 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17208 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17209 *total = ix86_cost->shift_const;
17211 *total = ix86_cost->shift_var;
17216 if (FLOAT_MODE_P (mode))
17218 *total = ix86_cost->fmul;
17223 rtx op0 = XEXP (x, 0);
17224 rtx op1 = XEXP (x, 1);
17226 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17228 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17229 for (nbits = 0; value != 0; value &= value - 1)
17233 /* This is arbitrary. */
17236 /* Compute costs correctly for widening multiplication. */
17237 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17238 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17239 == GET_MODE_SIZE (mode))
17241 int is_mulwiden = 0;
17242 enum machine_mode inner_mode = GET_MODE (op0);
17244 if (GET_CODE (op0) == GET_CODE (op1))
17245 is_mulwiden = 1, op1 = XEXP (op1, 0);
17246 else if (GET_CODE (op1) == CONST_INT)
17248 if (GET_CODE (op0) == SIGN_EXTEND)
17249 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17252 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17256 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17259 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17260 + nbits * ix86_cost->mult_bit
17261 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17270 if (FLOAT_MODE_P (mode))
17271 *total = ix86_cost->fdiv;
17273 *total = ix86_cost->divide[MODE_INDEX (mode)];
17277 if (FLOAT_MODE_P (mode))
17278 *total = ix86_cost->fadd;
17279 else if (GET_MODE_CLASS (mode) == MODE_INT
17280 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17282 if (GET_CODE (XEXP (x, 0)) == PLUS
17283 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17284 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17285 && CONSTANT_P (XEXP (x, 1)))
17287 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17288 if (val == 2 || val == 4 || val == 8)
17290 *total = ix86_cost->lea;
17291 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17292 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17294 *total += rtx_cost (XEXP (x, 1), outer_code);
17298 else if (GET_CODE (XEXP (x, 0)) == MULT
17299 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17301 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17302 if (val == 2 || val == 4 || val == 8)
17304 *total = ix86_cost->lea;
17305 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17306 *total += rtx_cost (XEXP (x, 1), outer_code);
17310 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17312 *total = ix86_cost->lea;
17313 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17314 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17315 *total += rtx_cost (XEXP (x, 1), outer_code);
17322 if (FLOAT_MODE_P (mode))
17324 *total = ix86_cost->fadd;
17332 if (!TARGET_64BIT && mode == DImode)
17334 *total = (ix86_cost->add * 2
17335 + (rtx_cost (XEXP (x, 0), outer_code)
17336 << (GET_MODE (XEXP (x, 0)) != DImode))
17337 + (rtx_cost (XEXP (x, 1), outer_code)
17338 << (GET_MODE (XEXP (x, 1)) != DImode)));
17344 if (FLOAT_MODE_P (mode))
17346 *total = ix86_cost->fchs;
17352 if (!TARGET_64BIT && mode == DImode)
17353 *total = ix86_cost->add * 2;
17355 *total = ix86_cost->add;
17359 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17360 && XEXP (XEXP (x, 0), 1) == const1_rtx
17361 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17362 && XEXP (x, 1) == const0_rtx)
17364 /* This kind of construct is implemented using test[bwl].
17365 Treat it as if we had an AND. */
17366 *total = (ix86_cost->add
17367 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17368 + rtx_cost (const1_rtx, outer_code));
17374 if (!TARGET_SSE_MATH
17376 || (mode == DFmode && !TARGET_SSE2))
17377 /* For standard 80387 constants, raise the cost to prevent
17378 compress_float_constant() to generate load from memory. */
17379 switch (standard_80387_constant_p (XEXP (x, 0)))
17389 *total = (x86_ext_80387_constants & TUNEMASK
17396 if (FLOAT_MODE_P (mode))
17397 *total = ix86_cost->fabs;
17401 if (FLOAT_MODE_P (mode))
17402 *total = ix86_cost->fsqrt;
17406 if (XINT (x, 1) == UNSPEC_TP)
17417 static int current_machopic_label_num;
17419 /* Given a symbol name and its associated stub, write out the
17420 definition of the stub. */
17423 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17425 unsigned int length;
17426 char *binder_name, *symbol_name, lazy_ptr_name[32];
17427 int label = ++current_machopic_label_num;
17429 /* For 64-bit we shouldn't get here. */
17430 gcc_assert (!TARGET_64BIT);
17432 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17433 symb = (*targetm.strip_name_encoding) (symb);
17435 length = strlen (stub);
17436 binder_name = alloca (length + 32);
17437 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17439 length = strlen (symb);
17440 symbol_name = alloca (length + 32);
17441 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17443 sprintf (lazy_ptr_name, "L%d$lz", label);
17446 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17448 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17450 fprintf (file, "%s:\n", stub);
17451 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17455 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17456 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17457 fprintf (file, "\tjmp\t*%%edx\n");
17460 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17462 fprintf (file, "%s:\n", binder_name);
17466 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17467 fprintf (file, "\tpushl\t%%eax\n");
17470 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17472 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17474 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17475 fprintf (file, "%s:\n", lazy_ptr_name);
17476 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17477 fprintf (file, "\t.long %s\n", binder_name);
17481 darwin_x86_file_end (void)
17483 darwin_file_end ();
17486 #endif /* TARGET_MACHO */
17488 /* Order the registers for register allocator. */
17491 x86_order_regs_for_local_alloc (void)
17496 /* First allocate the local general purpose registers. */
17497 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17498 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17499 reg_alloc_order [pos++] = i;
17501 /* Global general purpose registers. */
17502 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17503 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17504 reg_alloc_order [pos++] = i;
17506 /* x87 registers come first in case we are doing FP math
17508 if (!TARGET_SSE_MATH)
17509 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17510 reg_alloc_order [pos++] = i;
17512 /* SSE registers. */
17513 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17514 reg_alloc_order [pos++] = i;
17515 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17516 reg_alloc_order [pos++] = i;
17518 /* x87 registers. */
17519 if (TARGET_SSE_MATH)
17520 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17521 reg_alloc_order [pos++] = i;
17523 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17524 reg_alloc_order [pos++] = i;
17526 /* Initialize the rest of array as we do not allocate some registers
17528 while (pos < FIRST_PSEUDO_REGISTER)
17529 reg_alloc_order [pos++] = 0;
17532 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17533 struct attribute_spec.handler. */
17535 ix86_handle_struct_attribute (tree *node, tree name,
17536 tree args ATTRIBUTE_UNUSED,
17537 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17540 if (DECL_P (*node))
17542 if (TREE_CODE (*node) == TYPE_DECL)
17543 type = &TREE_TYPE (*node);
17548 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17549 || TREE_CODE (*type) == UNION_TYPE)))
17551 warning (OPT_Wattributes, "%qs attribute ignored",
17552 IDENTIFIER_POINTER (name));
17553 *no_add_attrs = true;
17556 else if ((is_attribute_p ("ms_struct", name)
17557 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17558 || ((is_attribute_p ("gcc_struct", name)
17559 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17561 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17562 IDENTIFIER_POINTER (name));
17563 *no_add_attrs = true;
17570 ix86_ms_bitfield_layout_p (tree record_type)
17572 return (TARGET_MS_BITFIELD_LAYOUT &&
17573 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17574 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17577 /* Returns an expression indicating where the this parameter is
17578 located on entry to the FUNCTION. */
17581 x86_this_parameter (tree function)
17583 tree type = TREE_TYPE (function);
17587 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17588 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17591 if (ix86_function_regparm (type, function) > 0)
17595 parm = TYPE_ARG_TYPES (type);
17596 /* Figure out whether or not the function has a variable number of
17598 for (; parm; parm = TREE_CHAIN (parm))
17599 if (TREE_VALUE (parm) == void_type_node)
17601 /* If not, the this parameter is in the first argument. */
17605 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17607 return gen_rtx_REG (SImode, regno);
17611 if (aggregate_value_p (TREE_TYPE (type), type))
17612 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17614 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17617 /* Determine whether x86_output_mi_thunk can succeed. */
17620 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17621 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17622 HOST_WIDE_INT vcall_offset, tree function)
17624 /* 64-bit can handle anything. */
17628 /* For 32-bit, everything's fine if we have one free register. */
17629 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17632 /* Need a free register for vcall_offset. */
17636 /* Need a free register for GOT references. */
17637 if (flag_pic && !(*targetm.binds_local_p) (function))
17640 /* Otherwise ok. */
17644 /* Output the assembler code for a thunk function. THUNK_DECL is the
17645 declaration for the thunk function itself, FUNCTION is the decl for
17646 the target function. DELTA is an immediate constant offset to be
17647 added to THIS. If VCALL_OFFSET is nonzero, the word at
17648 *(*this + vcall_offset) should be added to THIS. */
17651 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17652 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17653 HOST_WIDE_INT vcall_offset, tree function)
17656 rtx this = x86_this_parameter (function);
17659 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17660 pull it in now and let DELTA benefit. */
17663 else if (vcall_offset)
17665 /* Put the this parameter into %eax. */
17667 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17668 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17671 this_reg = NULL_RTX;
17673 /* Adjust the this parameter by a fixed constant. */
17676 xops[0] = GEN_INT (delta);
17677 xops[1] = this_reg ? this_reg : this;
17680 if (!x86_64_general_operand (xops[0], DImode))
17682 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17684 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17688 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17691 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17694 /* Adjust the this parameter by a value stored in the vtable. */
17698 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17701 int tmp_regno = 2 /* ECX */;
17702 if (lookup_attribute ("fastcall",
17703 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17704 tmp_regno = 0 /* EAX */;
17705 tmp = gen_rtx_REG (SImode, tmp_regno);
17708 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17711 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17713 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17715 /* Adjust the this parameter. */
17716 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17717 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17719 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17720 xops[0] = GEN_INT (vcall_offset);
17722 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17723 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17725 xops[1] = this_reg;
17727 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17729 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17732 /* If necessary, drop THIS back to its stack slot. */
17733 if (this_reg && this_reg != this)
17735 xops[0] = this_reg;
17737 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17740 xops[0] = XEXP (DECL_RTL (function), 0);
17743 if (!flag_pic || (*targetm.binds_local_p) (function))
17744 output_asm_insn ("jmp\t%P0", xops);
17747 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17748 tmp = gen_rtx_CONST (Pmode, tmp);
17749 tmp = gen_rtx_MEM (QImode, tmp);
17751 output_asm_insn ("jmp\t%A0", xops);
17756 if (!flag_pic || (*targetm.binds_local_p) (function))
17757 output_asm_insn ("jmp\t%P0", xops);
17762 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17763 tmp = (gen_rtx_SYMBOL_REF
17765 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17766 tmp = gen_rtx_MEM (QImode, tmp);
17768 output_asm_insn ("jmp\t%0", xops);
17771 #endif /* TARGET_MACHO */
17773 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17774 output_set_got (tmp, NULL_RTX);
17777 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17778 output_asm_insn ("jmp\t{*}%1", xops);
17784 x86_file_start (void)
17786 default_file_start ();
17788 darwin_file_start ();
17790 if (X86_FILE_START_VERSION_DIRECTIVE)
17791 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17792 if (X86_FILE_START_FLTUSED)
17793 fputs ("\t.global\t__fltused\n", asm_out_file);
17794 if (ix86_asm_dialect == ASM_INTEL)
17795 fputs ("\t.intel_syntax\n", asm_out_file);
17799 x86_field_alignment (tree field, int computed)
17801 enum machine_mode mode;
17802 tree type = TREE_TYPE (field);
17804 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17806 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17807 ? get_inner_array_type (type) : type);
17808 if (mode == DFmode || mode == DCmode
17809 || GET_MODE_CLASS (mode) == MODE_INT
17810 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17811 return MIN (32, computed);
17815 /* Output assembler code to FILE to increment profiler label # LABELNO
17816 for profiling a function entry. */
17818 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17823 #ifndef NO_PROFILE_COUNTERS
17824 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17826 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17830 #ifndef NO_PROFILE_COUNTERS
17831 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17833 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17837 #ifndef NO_PROFILE_COUNTERS
17838 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17839 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17841 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17845 #ifndef NO_PROFILE_COUNTERS
17846 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17847 PROFILE_COUNT_REGISTER);
17849 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17853 /* We don't have exact information about the insn sizes, but we may assume
17854 quite safely that we are informed about all 1 byte insns and memory
17855 address sizes. This is enough to eliminate unnecessary padding in
17859 min_insn_size (rtx insn)
17863 if (!INSN_P (insn) || !active_insn_p (insn))
17866 /* Discard alignments we've emit and jump instructions. */
17867 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17868 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17870 if (GET_CODE (insn) == JUMP_INSN
17871 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17872 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17875 /* Important case - calls are always 5 bytes.
17876 It is common to have many calls in the row. */
17877 if (GET_CODE (insn) == CALL_INSN
17878 && symbolic_reference_mentioned_p (PATTERN (insn))
17879 && !SIBLING_CALL_P (insn))
17881 if (get_attr_length (insn) <= 1)
17884 /* For normal instructions we may rely on the sizes of addresses
17885 and the presence of symbol to require 4 bytes of encoding.
17886 This is not the case for jumps where references are PC relative. */
17887 if (GET_CODE (insn) != JUMP_INSN)
17889 l = get_attr_length_address (insn);
17890 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17899 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17903 ix86_avoid_jump_misspredicts (void)
17905 rtx insn, start = get_insns ();
17906 int nbytes = 0, njumps = 0;
17909 /* Look for all minimal intervals of instructions containing 4 jumps.
17910 The intervals are bounded by START and INSN. NBYTES is the total
17911 size of instructions in the interval including INSN and not including
17912 START. When the NBYTES is smaller than 16 bytes, it is possible
17913 that the end of START and INSN ends up in the same 16byte page.
17915 The smallest offset in the page INSN can start is the case where START
17916 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17917 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17919 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17922 nbytes += min_insn_size (insn);
17924 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17925 INSN_UID (insn), min_insn_size (insn));
17926 if ((GET_CODE (insn) == JUMP_INSN
17927 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17928 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17929 || GET_CODE (insn) == CALL_INSN)
17936 start = NEXT_INSN (start);
17937 if ((GET_CODE (start) == JUMP_INSN
17938 && GET_CODE (PATTERN (start)) != ADDR_VEC
17939 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17940 || GET_CODE (start) == CALL_INSN)
17941 njumps--, isjump = 1;
17944 nbytes -= min_insn_size (start);
17946 gcc_assert (njumps >= 0);
17948 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17949 INSN_UID (start), INSN_UID (insn), nbytes);
17951 if (njumps == 3 && isjump && nbytes < 16)
17953 int padsize = 15 - nbytes + min_insn_size (insn);
17956 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17957 INSN_UID (insn), padsize);
17958 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17963 /* AMD Athlon works faster
17964 when RET is not destination of conditional jump or directly preceded
17965 by other jump instruction. We avoid the penalty by inserting NOP just
17966 before the RET instructions in such cases. */
17968 ix86_pad_returns (void)
17973 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17975 basic_block bb = e->src;
17976 rtx ret = BB_END (bb);
17978 bool replace = false;
17980 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17981 || !maybe_hot_bb_p (bb))
17983 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17984 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17986 if (prev && GET_CODE (prev) == CODE_LABEL)
17991 FOR_EACH_EDGE (e, ei, bb->preds)
17992 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17993 && !(e->flags & EDGE_FALLTHRU))
17998 prev = prev_active_insn (ret);
18000 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18001 || GET_CODE (prev) == CALL_INSN))
18003 /* Empty functions get branch mispredict even when the jump destination
18004 is not visible to us. */
18005 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18010 emit_insn_before (gen_return_internal_long (), ret);
18016 /* Implement machine specific optimizations. We implement padding of returns
18017 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18021 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18022 ix86_pad_returns ();
18023 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18024 ix86_avoid_jump_misspredicts ();
18027 /* Return nonzero when QImode register that must be represented via REX prefix
18030 x86_extended_QIreg_mentioned_p (rtx insn)
18033 extract_insn_cached (insn);
18034 for (i = 0; i < recog_data.n_operands; i++)
18035 if (REG_P (recog_data.operand[i])
18036 && REGNO (recog_data.operand[i]) >= 4)
18041 /* Return nonzero when P points to register encoded via REX prefix.
18042 Called via for_each_rtx. */
18044 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18046 unsigned int regno;
18049 regno = REGNO (*p);
18050 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18053 /* Return true when INSN mentions register that must be encoded using REX
18056 x86_extended_reg_mentioned_p (rtx insn)
18058 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18061 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18062 optabs would emit if we didn't have TFmode patterns. */
18065 x86_emit_floatuns (rtx operands[2])
18067 rtx neglab, donelab, i0, i1, f0, in, out;
18068 enum machine_mode mode, inmode;
18070 inmode = GET_MODE (operands[1]);
18071 gcc_assert (inmode == SImode || inmode == DImode);
18074 in = force_reg (inmode, operands[1]);
18075 mode = GET_MODE (out);
18076 neglab = gen_label_rtx ();
18077 donelab = gen_label_rtx ();
18078 i1 = gen_reg_rtx (Pmode);
18079 f0 = gen_reg_rtx (mode);
18081 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18083 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18084 emit_jump_insn (gen_jump (donelab));
18087 emit_label (neglab);
18089 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18090 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18091 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18092 expand_float (f0, i0, 0);
18093 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18095 emit_label (donelab);
18098 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18099 with all elements equal to VAR. Return true if successful. */
18102 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18103 rtx target, rtx val)
18105 enum machine_mode smode, wsmode, wvmode;
18120 val = force_reg (GET_MODE_INNER (mode), val);
18121 x = gen_rtx_VEC_DUPLICATE (mode, val);
18122 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18128 if (TARGET_SSE || TARGET_3DNOW_A)
18130 val = gen_lowpart (SImode, val);
18131 x = gen_rtx_TRUNCATE (HImode, val);
18132 x = gen_rtx_VEC_DUPLICATE (mode, x);
18133 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18155 /* Extend HImode to SImode using a paradoxical SUBREG. */
18156 tmp1 = gen_reg_rtx (SImode);
18157 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18158 /* Insert the SImode value as low element of V4SImode vector. */
18159 tmp2 = gen_reg_rtx (V4SImode);
18160 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18161 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18162 CONST0_RTX (V4SImode),
18164 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18165 /* Cast the V4SImode vector back to a V8HImode vector. */
18166 tmp1 = gen_reg_rtx (V8HImode);
18167 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18168 /* Duplicate the low short through the whole low SImode word. */
18169 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18170 /* Cast the V8HImode vector back to a V4SImode vector. */
18171 tmp2 = gen_reg_rtx (V4SImode);
18172 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18173 /* Replicate the low element of the V4SImode vector. */
18174 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18175 /* Cast the V2SImode back to V8HImode, and store in target. */
18176 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18187 /* Extend QImode to SImode using a paradoxical SUBREG. */
18188 tmp1 = gen_reg_rtx (SImode);
18189 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18190 /* Insert the SImode value as low element of V4SImode vector. */
18191 tmp2 = gen_reg_rtx (V4SImode);
18192 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18193 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18194 CONST0_RTX (V4SImode),
18196 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18197 /* Cast the V4SImode vector back to a V16QImode vector. */
18198 tmp1 = gen_reg_rtx (V16QImode);
18199 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18200 /* Duplicate the low byte through the whole low SImode word. */
18201 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18202 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18203 /* Cast the V16QImode vector back to a V4SImode vector. */
18204 tmp2 = gen_reg_rtx (V4SImode);
18205 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18206 /* Replicate the low element of the V4SImode vector. */
18207 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18208 /* Cast the V2SImode back to V16QImode, and store in target. */
18209 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18217 /* Replicate the value once into the next wider mode and recurse. */
18218 val = convert_modes (wsmode, smode, val, true);
18219 x = expand_simple_binop (wsmode, ASHIFT, val,
18220 GEN_INT (GET_MODE_BITSIZE (smode)),
18221 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18222 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18224 x = gen_reg_rtx (wvmode);
18225 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18226 gcc_unreachable ();
18227 emit_move_insn (target, gen_lowpart (mode, x));
18235 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18236 whose ONE_VAR element is VAR, and other elements are zero. Return true
18240 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18241 rtx target, rtx var, int one_var)
18243 enum machine_mode vsimode;
18259 var = force_reg (GET_MODE_INNER (mode), var);
18260 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18261 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18266 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18267 new_target = gen_reg_rtx (mode);
18269 new_target = target;
18270 var = force_reg (GET_MODE_INNER (mode), var);
18271 x = gen_rtx_VEC_DUPLICATE (mode, var);
18272 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18273 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18276 /* We need to shuffle the value to the correct position, so
18277 create a new pseudo to store the intermediate result. */
18279 /* With SSE2, we can use the integer shuffle insns. */
18280 if (mode != V4SFmode && TARGET_SSE2)
18282 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18284 GEN_INT (one_var == 1 ? 0 : 1),
18285 GEN_INT (one_var == 2 ? 0 : 1),
18286 GEN_INT (one_var == 3 ? 0 : 1)));
18287 if (target != new_target)
18288 emit_move_insn (target, new_target);
18292 /* Otherwise convert the intermediate result to V4SFmode and
18293 use the SSE1 shuffle instructions. */
18294 if (mode != V4SFmode)
18296 tmp = gen_reg_rtx (V4SFmode);
18297 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18302 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18304 GEN_INT (one_var == 1 ? 0 : 1),
18305 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18306 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18308 if (mode != V4SFmode)
18309 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18310 else if (tmp != target)
18311 emit_move_insn (target, tmp);
18313 else if (target != new_target)
18314 emit_move_insn (target, new_target);
18319 vsimode = V4SImode;
18325 vsimode = V2SImode;
18331 /* Zero extend the variable element to SImode and recurse. */
18332 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18334 x = gen_reg_rtx (vsimode);
18335 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18337 gcc_unreachable ();
18339 emit_move_insn (target, gen_lowpart (mode, x));
18347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18348 consisting of the values in VALS. It is known that all elements
18349 except ONE_VAR are constants. Return true if successful. */
18352 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18353 rtx target, rtx vals, int one_var)
18355 rtx var = XVECEXP (vals, 0, one_var);
18356 enum machine_mode wmode;
18359 const_vec = copy_rtx (vals);
18360 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18361 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18369 /* For the two element vectors, it's just as easy to use
18370 the general case. */
18386 /* There's no way to set one QImode entry easily. Combine
18387 the variable value with its adjacent constant value, and
18388 promote to an HImode set. */
18389 x = XVECEXP (vals, 0, one_var ^ 1);
18392 var = convert_modes (HImode, QImode, var, true);
18393 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18394 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18395 x = GEN_INT (INTVAL (x) & 0xff);
18399 var = convert_modes (HImode, QImode, var, true);
18400 x = gen_int_mode (INTVAL (x) << 8, HImode);
18402 if (x != const0_rtx)
18403 var = expand_simple_binop (HImode, IOR, var, x, var,
18404 1, OPTAB_LIB_WIDEN);
18406 x = gen_reg_rtx (wmode);
18407 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18408 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18410 emit_move_insn (target, gen_lowpart (mode, x));
18417 emit_move_insn (target, const_vec);
18418 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18422 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18423 all values variable, and none identical. */
18426 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18427 rtx target, rtx vals)
18429 enum machine_mode half_mode = GET_MODE_INNER (mode);
18430 rtx op0 = NULL, op1 = NULL;
18431 bool use_vec_concat = false;
18437 if (!mmx_ok && !TARGET_SSE)
18443 /* For the two element vectors, we always implement VEC_CONCAT. */
18444 op0 = XVECEXP (vals, 0, 0);
18445 op1 = XVECEXP (vals, 0, 1);
18446 use_vec_concat = true;
18450 half_mode = V2SFmode;
18453 half_mode = V2SImode;
18459 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18460 Recurse to load the two halves. */
18462 op0 = gen_reg_rtx (half_mode);
18463 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18464 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18466 op1 = gen_reg_rtx (half_mode);
18467 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18468 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18470 use_vec_concat = true;
18481 gcc_unreachable ();
18484 if (use_vec_concat)
18486 if (!register_operand (op0, half_mode))
18487 op0 = force_reg (half_mode, op0);
18488 if (!register_operand (op1, half_mode))
18489 op1 = force_reg (half_mode, op1);
18491 emit_insn (gen_rtx_SET (VOIDmode, target,
18492 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18496 int i, j, n_elts, n_words, n_elt_per_word;
18497 enum machine_mode inner_mode;
18498 rtx words[4], shift;
18500 inner_mode = GET_MODE_INNER (mode);
18501 n_elts = GET_MODE_NUNITS (mode);
18502 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18503 n_elt_per_word = n_elts / n_words;
18504 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18506 for (i = 0; i < n_words; ++i)
18508 rtx word = NULL_RTX;
18510 for (j = 0; j < n_elt_per_word; ++j)
18512 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18513 elt = convert_modes (word_mode, inner_mode, elt, true);
18519 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18520 word, 1, OPTAB_LIB_WIDEN);
18521 word = expand_simple_binop (word_mode, IOR, word, elt,
18522 word, 1, OPTAB_LIB_WIDEN);
18530 emit_move_insn (target, gen_lowpart (mode, words[0]));
18531 else if (n_words == 2)
18533 rtx tmp = gen_reg_rtx (mode);
18534 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18535 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18536 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18537 emit_move_insn (target, tmp);
18539 else if (n_words == 4)
18541 rtx tmp = gen_reg_rtx (V4SImode);
18542 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18543 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18544 emit_move_insn (target, gen_lowpart (mode, tmp));
18547 gcc_unreachable ();
18551 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18552 instructions unless MMX_OK is true. */
18555 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18557 enum machine_mode mode = GET_MODE (target);
18558 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18559 int n_elts = GET_MODE_NUNITS (mode);
18560 int n_var = 0, one_var = -1;
18561 bool all_same = true, all_const_zero = true;
18565 for (i = 0; i < n_elts; ++i)
18567 x = XVECEXP (vals, 0, i);
18568 if (!CONSTANT_P (x))
18569 n_var++, one_var = i;
18570 else if (x != CONST0_RTX (inner_mode))
18571 all_const_zero = false;
18572 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18576 /* Constants are best loaded from the constant pool. */
18579 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18583 /* If all values are identical, broadcast the value. */
18585 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18586 XVECEXP (vals, 0, 0)))
18589 /* Values where only one field is non-constant are best loaded from
18590 the pool and overwritten via move later. */
18594 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18595 XVECEXP (vals, 0, one_var),
18599 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18603 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18607 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18609 enum machine_mode mode = GET_MODE (target);
18610 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18611 bool use_vec_merge = false;
18620 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18621 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18623 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18625 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18626 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18636 /* For the two element vectors, we implement a VEC_CONCAT with
18637 the extraction of the other element. */
18639 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18640 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18643 op0 = val, op1 = tmp;
18645 op0 = tmp, op1 = val;
18647 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18648 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18656 use_vec_merge = true;
18660 /* tmp = target = A B C D */
18661 tmp = copy_to_reg (target);
18662 /* target = A A B B */
18663 emit_insn (gen_sse_unpcklps (target, target, target));
18664 /* target = X A B B */
18665 ix86_expand_vector_set (false, target, val, 0);
18666 /* target = A X C D */
18667 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18668 GEN_INT (1), GEN_INT (0),
18669 GEN_INT (2+4), GEN_INT (3+4)));
18673 /* tmp = target = A B C D */
18674 tmp = copy_to_reg (target);
18675 /* tmp = X B C D */
18676 ix86_expand_vector_set (false, tmp, val, 0);
18677 /* target = A B X D */
18678 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18679 GEN_INT (0), GEN_INT (1),
18680 GEN_INT (0+4), GEN_INT (3+4)));
18684 /* tmp = target = A B C D */
18685 tmp = copy_to_reg (target);
18686 /* tmp = X B C D */
18687 ix86_expand_vector_set (false, tmp, val, 0);
18688 /* target = A B X D */
18689 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18690 GEN_INT (0), GEN_INT (1),
18691 GEN_INT (2+4), GEN_INT (0+4)));
18695 gcc_unreachable ();
18700 /* Element 0 handled by vec_merge below. */
18703 use_vec_merge = true;
18709 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18710 store into element 0, then shuffle them back. */
18714 order[0] = GEN_INT (elt);
18715 order[1] = const1_rtx;
18716 order[2] = const2_rtx;
18717 order[3] = GEN_INT (3);
18718 order[elt] = const0_rtx;
18720 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18721 order[1], order[2], order[3]));
18723 ix86_expand_vector_set (false, target, val, 0);
18725 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18726 order[1], order[2], order[3]));
18730 /* For SSE1, we have to reuse the V4SF code. */
18731 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18732 gen_lowpart (SFmode, val), elt);
18737 use_vec_merge = TARGET_SSE2;
18740 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18751 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18752 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18753 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18757 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18759 emit_move_insn (mem, target);
18761 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18762 emit_move_insn (tmp, val);
18764 emit_move_insn (target, mem);
18769 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18771 enum machine_mode mode = GET_MODE (vec);
18772 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18773 bool use_vec_extr = false;
18786 use_vec_extr = true;
18798 tmp = gen_reg_rtx (mode);
18799 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18800 GEN_INT (elt), GEN_INT (elt),
18801 GEN_INT (elt+4), GEN_INT (elt+4)));
18805 tmp = gen_reg_rtx (mode);
18806 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18810 gcc_unreachable ();
18813 use_vec_extr = true;
18828 tmp = gen_reg_rtx (mode);
18829 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18830 GEN_INT (elt), GEN_INT (elt),
18831 GEN_INT (elt), GEN_INT (elt)));
18835 tmp = gen_reg_rtx (mode);
18836 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18840 gcc_unreachable ();
18843 use_vec_extr = true;
18848 /* For SSE1, we have to reuse the V4SF code. */
18849 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18850 gen_lowpart (V4SFmode, vec), elt);
18856 use_vec_extr = TARGET_SSE2;
18859 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18864 /* ??? Could extract the appropriate HImode element and shift. */
18871 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18872 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18874 /* Let the rtl optimizers know about the zero extension performed. */
18875 if (inner_mode == HImode)
18877 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18878 target = gen_lowpart (SImode, target);
18881 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18885 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18887 emit_move_insn (mem, vec);
18889 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18890 emit_move_insn (target, tmp);
18894 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18895 pattern to reduce; DEST is the destination; IN is the input vector. */
18898 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18900 rtx tmp1, tmp2, tmp3;
18902 tmp1 = gen_reg_rtx (V4SFmode);
18903 tmp2 = gen_reg_rtx (V4SFmode);
18904 tmp3 = gen_reg_rtx (V4SFmode);
18906 emit_insn (gen_sse_movhlps (tmp1, in, in));
18907 emit_insn (fn (tmp2, tmp1, in));
18909 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18910 GEN_INT (1), GEN_INT (1),
18911 GEN_INT (1+4), GEN_INT (1+4)));
18912 emit_insn (fn (dest, tmp2, tmp3));
18915 /* Target hook for scalar_mode_supported_p. */
18917 ix86_scalar_mode_supported_p (enum machine_mode mode)
18919 if (DECIMAL_FLOAT_MODE_P (mode))
18922 return default_scalar_mode_supported_p (mode);
18925 /* Implements target hook vector_mode_supported_p. */
18927 ix86_vector_mode_supported_p (enum machine_mode mode)
18929 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18931 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18933 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18935 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18940 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18942 We do this in the new i386 backend to maintain source compatibility
18943 with the old cc0-based compiler. */
18946 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18947 tree inputs ATTRIBUTE_UNUSED,
18950 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18952 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18954 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18959 /* Return true if this goes in small data/bss. */
18962 ix86_in_large_data_p (tree exp)
18964 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18967 /* Functions are never large data. */
18968 if (TREE_CODE (exp) == FUNCTION_DECL)
18971 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18973 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18974 if (strcmp (section, ".ldata") == 0
18975 || strcmp (section, ".lbss") == 0)
18981 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18983 /* If this is an incomplete type with size 0, then we can't put it
18984 in data because it might be too big when completed. */
18985 if (!size || size > ix86_section_threshold)
18992 ix86_encode_section_info (tree decl, rtx rtl, int first)
18994 default_encode_section_info (decl, rtl, first);
18996 if (TREE_CODE (decl) == VAR_DECL
18997 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18998 && ix86_in_large_data_p (decl))
18999 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19002 /* Worker function for REVERSE_CONDITION. */
19005 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19007 return (mode != CCFPmode && mode != CCFPUmode
19008 ? reverse_condition (code)
19009 : reverse_condition_maybe_unordered (code));
19012 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19016 output_387_reg_move (rtx insn, rtx *operands)
19018 if (REG_P (operands[1])
19019 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19021 if (REGNO (operands[0]) == FIRST_STACK_REG)
19022 return output_387_ffreep (operands, 0);
19023 return "fstp\t%y0";
19025 if (STACK_TOP_P (operands[0]))
19026 return "fld%z1\t%y1";
19030 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19031 FP status register is set. */
19034 ix86_emit_fp_unordered_jump (rtx label)
19036 rtx reg = gen_reg_rtx (HImode);
19039 emit_insn (gen_x86_fnstsw_1 (reg));
19041 if (TARGET_USE_SAHF)
19043 emit_insn (gen_x86_sahf_1 (reg));
19045 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19046 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19050 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19052 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19053 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19056 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19057 gen_rtx_LABEL_REF (VOIDmode, label),
19059 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19060 emit_jump_insn (temp);
19063 /* Output code to perform a log1p XFmode calculation. */
19065 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19067 rtx label1 = gen_label_rtx ();
19068 rtx label2 = gen_label_rtx ();
19070 rtx tmp = gen_reg_rtx (XFmode);
19071 rtx tmp2 = gen_reg_rtx (XFmode);
19073 emit_insn (gen_absxf2 (tmp, op1));
19074 emit_insn (gen_cmpxf (tmp,
19075 CONST_DOUBLE_FROM_REAL_VALUE (
19076 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19078 emit_jump_insn (gen_bge (label1));
19080 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19081 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19082 emit_jump (label2);
19084 emit_label (label1);
19085 emit_move_insn (tmp, CONST1_RTX (XFmode));
19086 emit_insn (gen_addxf3 (tmp, op1, tmp));
19087 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19088 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19090 emit_label (label2);
19093 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19096 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19099 /* With Binutils 2.15, the "@unwind" marker must be specified on
19100 every occurrence of the ".eh_frame" section, not just the first
19103 && strcmp (name, ".eh_frame") == 0)
19105 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19106 flags & SECTION_WRITE ? "aw" : "a");
19109 default_elf_asm_named_section (name, flags, decl);
19112 /* Return the mangling of TYPE if it is an extended fundamental type. */
19114 static const char *
19115 ix86_mangle_fundamental_type (tree type)
19117 switch (TYPE_MODE (type))
19120 /* __float128 is "g". */
19123 /* "long double" or __float80 is "e". */
19130 /* For 32-bit code we can save PIC register setup by using
19131 __stack_chk_fail_local hidden function instead of calling
19132 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19133 register, so it is better to call __stack_chk_fail directly. */
19136 ix86_stack_protect_fail (void)
19138 return TARGET_64BIT
19139 ? default_external_stack_protect_fail ()
19140 : default_hidden_stack_protect_fail ();
19143 /* Select a format to encode pointers in exception handling data. CODE
19144 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19145 true if the symbol may be affected by dynamic relocations.
19147 ??? All x86 object file formats are capable of representing this.
19148 After all, the relocation needed is the same as for the call insn.
19149 Whether or not a particular assembler allows us to enter such, I
19150 guess we'll have to see. */
19152 asm_preferred_eh_data_format (int code, int global)
19156 int type = DW_EH_PE_sdata8;
19158 || ix86_cmodel == CM_SMALL_PIC
19159 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19160 type = DW_EH_PE_sdata4;
19161 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19163 if (ix86_cmodel == CM_SMALL
19164 || (ix86_cmodel == CM_MEDIUM && code))
19165 return DW_EH_PE_udata4;
19166 return DW_EH_PE_absptr;
19169 #include "gt-i386.h"