1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
65 /* Processor costs (relative to an add) */
66 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
67 #define COSTS_N_BYTES(N) ((N) * 2)
70 struct processor_costs size_cost = { /* costs for tunning for size */
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
90 2, /* cost for loading QImode using movzbl */
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {2, 2, 2}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {2, 2, 2}, /* cost of loading integer registers */
99 3, /* cost of moving MMX register */
100 {3, 3}, /* cost of loading MMX registers
101 in SImode and DImode */
102 {3, 3}, /* cost of storing MMX registers
103 in SImode and DImode */
104 3, /* cost of moving SSE register */
105 {3, 3, 3}, /* cost of loading SSE registers
106 in SImode, DImode and TImode */
107 {3, 3, 3}, /* cost of storing SSE registers
108 in SImode, DImode and TImode */
109 3, /* MMX or SSE register to integer */
110 0, /* size of prefetch block */
111 0, /* number of parallel prefetches */
113 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
114 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
115 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
116 COSTS_N_BYTES (2), /* cost of FABS instruction. */
117 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
118 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 /* Processor costs (relative to an add) */
123 struct processor_costs i386_cost = { /* 386 specific costs */
124 COSTS_N_INSNS (1), /* cost of an add instruction */
125 COSTS_N_INSNS (1), /* cost of a lea instruction */
126 COSTS_N_INSNS (3), /* variable shift costs */
127 COSTS_N_INSNS (2), /* constant shift costs */
128 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
129 COSTS_N_INSNS (6), /* HI */
130 COSTS_N_INSNS (6), /* SI */
131 COSTS_N_INSNS (6), /* DI */
132 COSTS_N_INSNS (6)}, /* other */
133 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
134 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
135 COSTS_N_INSNS (23), /* HI */
136 COSTS_N_INSNS (23), /* SI */
137 COSTS_N_INSNS (23), /* DI */
138 COSTS_N_INSNS (23)}, /* other */
139 COSTS_N_INSNS (3), /* cost of movsx */
140 COSTS_N_INSNS (2), /* cost of movzx */
141 15, /* "large" insn */
143 4, /* cost for loading QImode using movzbl */
144 {2, 4, 2}, /* cost of loading integer registers
145 in QImode, HImode and SImode.
146 Relative to reg-reg move (2). */
147 {2, 4, 2}, /* cost of storing integer registers */
148 2, /* cost of reg,reg fld/fst */
149 {8, 8, 8}, /* cost of loading fp registers
150 in SFmode, DFmode and XFmode */
151 {8, 8, 8}, /* cost of loading integer registers */
152 2, /* cost of moving MMX register */
153 {4, 8}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {4, 8}, /* cost of storing MMX registers
156 in SImode and DImode */
157 2, /* cost of moving SSE register */
158 {4, 8, 16}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {4, 8, 16}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of prefetch block */
164 0, /* number of parallel prefetches */
166 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
167 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
168 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
169 COSTS_N_INSNS (22), /* cost of FABS instruction. */
170 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
171 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
175 struct processor_costs i486_cost = { /* 486 specific costs */
176 COSTS_N_INSNS (1), /* cost of an add instruction */
177 COSTS_N_INSNS (1), /* cost of a lea instruction */
178 COSTS_N_INSNS (3), /* variable shift costs */
179 COSTS_N_INSNS (2), /* constant shift costs */
180 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
181 COSTS_N_INSNS (12), /* HI */
182 COSTS_N_INSNS (12), /* SI */
183 COSTS_N_INSNS (12), /* DI */
184 COSTS_N_INSNS (12)}, /* other */
185 1, /* cost of multiply per each bit set */
186 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
187 COSTS_N_INSNS (40), /* HI */
188 COSTS_N_INSNS (40), /* SI */
189 COSTS_N_INSNS (40), /* DI */
190 COSTS_N_INSNS (40)}, /* other */
191 COSTS_N_INSNS (3), /* cost of movsx */
192 COSTS_N_INSNS (2), /* cost of movzx */
193 15, /* "large" insn */
195 4, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {8, 8, 8}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {8, 8, 8}, /* cost of loading integer registers */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
218 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
219 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
220 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
221 COSTS_N_INSNS (3), /* cost of FABS instruction. */
222 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
223 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
227 struct processor_costs pentium_cost = {
228 COSTS_N_INSNS (1), /* cost of an add instruction */
229 COSTS_N_INSNS (1), /* cost of a lea instruction */
230 COSTS_N_INSNS (4), /* variable shift costs */
231 COSTS_N_INSNS (1), /* constant shift costs */
232 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (11), /* HI */
234 COSTS_N_INSNS (11), /* SI */
235 COSTS_N_INSNS (11), /* DI */
236 COSTS_N_INSNS (11)}, /* other */
237 0, /* cost of multiply per each bit set */
238 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
239 COSTS_N_INSNS (25), /* HI */
240 COSTS_N_INSNS (25), /* SI */
241 COSTS_N_INSNS (25), /* DI */
242 COSTS_N_INSNS (25)}, /* other */
243 COSTS_N_INSNS (3), /* cost of movsx */
244 COSTS_N_INSNS (2), /* cost of movzx */
245 8, /* "large" insn */
247 6, /* cost for loading QImode using movzbl */
248 {2, 4, 2}, /* cost of loading integer registers
249 in QImode, HImode and SImode.
250 Relative to reg-reg move (2). */
251 {2, 4, 2}, /* cost of storing integer registers */
252 2, /* cost of reg,reg fld/fst */
253 {2, 2, 6}, /* cost of loading fp registers
254 in SFmode, DFmode and XFmode */
255 {4, 4, 6}, /* cost of loading integer registers */
256 8, /* cost of moving MMX register */
257 {8, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {8, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of prefetch block */
268 0, /* number of parallel prefetches */
270 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
271 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
272 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
273 COSTS_N_INSNS (1), /* cost of FABS instruction. */
274 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
275 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
279 struct processor_costs pentiumpro_cost = {
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (1), /* variable shift costs */
283 COSTS_N_INSNS (1), /* constant shift costs */
284 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (4), /* HI */
286 COSTS_N_INSNS (4), /* SI */
287 COSTS_N_INSNS (4), /* DI */
288 COSTS_N_INSNS (4)}, /* other */
289 0, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (17), /* HI */
292 COSTS_N_INSNS (17), /* SI */
293 COSTS_N_INSNS (17), /* DI */
294 COSTS_N_INSNS (17)}, /* other */
295 COSTS_N_INSNS (1), /* cost of movsx */
296 COSTS_N_INSNS (1), /* cost of movzx */
297 8, /* "large" insn */
299 2, /* cost for loading QImode using movzbl */
300 {4, 4, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 2, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {2, 2, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 6}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 3, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 6, /* number of parallel prefetches */
322 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
323 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
324 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
327 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
331 struct processor_costs k6_cost = {
332 COSTS_N_INSNS (1), /* cost of an add instruction */
333 COSTS_N_INSNS (2), /* cost of a lea instruction */
334 COSTS_N_INSNS (1), /* variable shift costs */
335 COSTS_N_INSNS (1), /* constant shift costs */
336 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
337 COSTS_N_INSNS (3), /* HI */
338 COSTS_N_INSNS (3), /* SI */
339 COSTS_N_INSNS (3), /* DI */
340 COSTS_N_INSNS (3)}, /* other */
341 0, /* cost of multiply per each bit set */
342 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
343 COSTS_N_INSNS (18), /* HI */
344 COSTS_N_INSNS (18), /* SI */
345 COSTS_N_INSNS (18), /* DI */
346 COSTS_N_INSNS (18)}, /* other */
347 COSTS_N_INSNS (2), /* cost of movsx */
348 COSTS_N_INSNS (2), /* cost of movzx */
349 8, /* "large" insn */
351 3, /* cost for loading QImode using movzbl */
352 {4, 5, 4}, /* cost of loading integer registers
353 in QImode, HImode and SImode.
354 Relative to reg-reg move (2). */
355 {2, 3, 2}, /* cost of storing integer registers */
356 4, /* cost of reg,reg fld/fst */
357 {6, 6, 6}, /* cost of loading fp registers
358 in SFmode, DFmode and XFmode */
359 {4, 4, 4}, /* cost of loading integer registers */
360 2, /* cost of moving MMX register */
361 {2, 2}, /* cost of loading MMX registers
362 in SImode and DImode */
363 {2, 2}, /* cost of storing MMX registers
364 in SImode and DImode */
365 2, /* cost of moving SSE register */
366 {2, 2, 8}, /* cost of loading SSE registers
367 in SImode, DImode and TImode */
368 {2, 2, 8}, /* cost of storing SSE registers
369 in SImode, DImode and TImode */
370 6, /* MMX or SSE register to integer */
371 32, /* size of prefetch block */
372 1, /* number of parallel prefetches */
374 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
375 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
376 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
377 COSTS_N_INSNS (2), /* cost of FABS instruction. */
378 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
379 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
383 struct processor_costs athlon_cost = {
384 COSTS_N_INSNS (1), /* cost of an add instruction */
385 COSTS_N_INSNS (2), /* cost of a lea instruction */
386 COSTS_N_INSNS (1), /* variable shift costs */
387 COSTS_N_INSNS (1), /* constant shift costs */
388 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
389 COSTS_N_INSNS (5), /* HI */
390 COSTS_N_INSNS (5), /* SI */
391 COSTS_N_INSNS (5), /* DI */
392 COSTS_N_INSNS (5)}, /* other */
393 0, /* cost of multiply per each bit set */
394 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
395 COSTS_N_INSNS (26), /* HI */
396 COSTS_N_INSNS (42), /* SI */
397 COSTS_N_INSNS (74), /* DI */
398 COSTS_N_INSNS (74)}, /* other */
399 COSTS_N_INSNS (1), /* cost of movsx */
400 COSTS_N_INSNS (1), /* cost of movzx */
401 8, /* "large" insn */
403 4, /* cost for loading QImode using movzbl */
404 {3, 4, 3}, /* cost of loading integer registers
405 in QImode, HImode and SImode.
406 Relative to reg-reg move (2). */
407 {3, 4, 3}, /* cost of storing integer registers */
408 4, /* cost of reg,reg fld/fst */
409 {4, 4, 12}, /* cost of loading fp registers
410 in SFmode, DFmode and XFmode */
411 {6, 6, 8}, /* cost of loading integer registers */
412 2, /* cost of moving MMX register */
413 {4, 4}, /* cost of loading MMX registers
414 in SImode and DImode */
415 {4, 4}, /* cost of storing MMX registers
416 in SImode and DImode */
417 2, /* cost of moving SSE register */
418 {4, 4, 6}, /* cost of loading SSE registers
419 in SImode, DImode and TImode */
420 {4, 4, 5}, /* cost of storing SSE registers
421 in SImode, DImode and TImode */
422 5, /* MMX or SSE register to integer */
423 64, /* size of prefetch block */
424 6, /* number of parallel prefetches */
426 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (2), /* cost of FABS instruction. */
430 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
435 struct processor_costs k8_cost = {
436 COSTS_N_INSNS (1), /* cost of an add instruction */
437 COSTS_N_INSNS (2), /* cost of a lea instruction */
438 COSTS_N_INSNS (1), /* variable shift costs */
439 COSTS_N_INSNS (1), /* constant shift costs */
440 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
441 COSTS_N_INSNS (4), /* HI */
442 COSTS_N_INSNS (3), /* SI */
443 COSTS_N_INSNS (4), /* DI */
444 COSTS_N_INSNS (5)}, /* other */
445 0, /* cost of multiply per each bit set */
446 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
447 COSTS_N_INSNS (26), /* HI */
448 COSTS_N_INSNS (42), /* SI */
449 COSTS_N_INSNS (74), /* DI */
450 COSTS_N_INSNS (74)}, /* other */
451 COSTS_N_INSNS (1), /* cost of movsx */
452 COSTS_N_INSNS (1), /* cost of movzx */
453 8, /* "large" insn */
455 4, /* cost for loading QImode using movzbl */
456 {3, 4, 3}, /* cost of loading integer registers
457 in QImode, HImode and SImode.
458 Relative to reg-reg move (2). */
459 {3, 4, 3}, /* cost of storing integer registers */
460 4, /* cost of reg,reg fld/fst */
461 {4, 4, 12}, /* cost of loading fp registers
462 in SFmode, DFmode and XFmode */
463 {6, 6, 8}, /* cost of loading integer registers */
464 2, /* cost of moving MMX register */
465 {3, 3}, /* cost of loading MMX registers
466 in SImode and DImode */
467 {4, 4}, /* cost of storing MMX registers
468 in SImode and DImode */
469 2, /* cost of moving SSE register */
470 {4, 3, 6}, /* cost of loading SSE registers
471 in SImode, DImode and TImode */
472 {4, 4, 5}, /* cost of storing SSE registers
473 in SImode, DImode and TImode */
474 5, /* MMX or SSE register to integer */
475 64, /* size of prefetch block */
476 6, /* number of parallel prefetches */
478 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
479 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
480 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
481 COSTS_N_INSNS (2), /* cost of FABS instruction. */
482 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
483 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
487 struct processor_costs pentium4_cost = {
488 COSTS_N_INSNS (1), /* cost of an add instruction */
489 COSTS_N_INSNS (3), /* cost of a lea instruction */
490 COSTS_N_INSNS (4), /* variable shift costs */
491 COSTS_N_INSNS (4), /* constant shift costs */
492 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
493 COSTS_N_INSNS (15), /* HI */
494 COSTS_N_INSNS (15), /* SI */
495 COSTS_N_INSNS (15), /* DI */
496 COSTS_N_INSNS (15)}, /* other */
497 0, /* cost of multiply per each bit set */
498 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
499 COSTS_N_INSNS (56), /* HI */
500 COSTS_N_INSNS (56), /* SI */
501 COSTS_N_INSNS (56), /* DI */
502 COSTS_N_INSNS (56)}, /* other */
503 COSTS_N_INSNS (1), /* cost of movsx */
504 COSTS_N_INSNS (1), /* cost of movzx */
505 16, /* "large" insn */
507 2, /* cost for loading QImode using movzbl */
508 {4, 5, 4}, /* cost of loading integer registers
509 in QImode, HImode and SImode.
510 Relative to reg-reg move (2). */
511 {2, 3, 2}, /* cost of storing integer registers */
512 2, /* cost of reg,reg fld/fst */
513 {2, 2, 6}, /* cost of loading fp registers
514 in SFmode, DFmode and XFmode */
515 {4, 4, 6}, /* cost of loading integer registers */
516 2, /* cost of moving MMX register */
517 {2, 2}, /* cost of loading MMX registers
518 in SImode and DImode */
519 {2, 2}, /* cost of storing MMX registers
520 in SImode and DImode */
521 12, /* cost of moving SSE register */
522 {12, 12, 12}, /* cost of loading SSE registers
523 in SImode, DImode and TImode */
524 {2, 2, 8}, /* cost of storing SSE registers
525 in SImode, DImode and TImode */
526 10, /* MMX or SSE register to integer */
527 64, /* size of prefetch block */
528 6, /* number of parallel prefetches */
530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
539 struct processor_costs nocona_cost = {
540 COSTS_N_INSNS (1), /* cost of an add instruction */
541 COSTS_N_INSNS (1), /* cost of a lea instruction */
542 COSTS_N_INSNS (1), /* variable shift costs */
543 COSTS_N_INSNS (1), /* constant shift costs */
544 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
545 COSTS_N_INSNS (10), /* HI */
546 COSTS_N_INSNS (10), /* SI */
547 COSTS_N_INSNS (10), /* DI */
548 COSTS_N_INSNS (10)}, /* other */
549 0, /* cost of multiply per each bit set */
550 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
551 COSTS_N_INSNS (66), /* HI */
552 COSTS_N_INSNS (66), /* SI */
553 COSTS_N_INSNS (66), /* DI */
554 COSTS_N_INSNS (66)}, /* other */
555 COSTS_N_INSNS (1), /* cost of movsx */
556 COSTS_N_INSNS (1), /* cost of movzx */
557 16, /* "large" insn */
559 4, /* cost for loading QImode using movzbl */
560 {4, 4, 4}, /* cost of loading integer registers
561 in QImode, HImode and SImode.
562 Relative to reg-reg move (2). */
563 {4, 4, 4}, /* cost of storing integer registers */
564 3, /* cost of reg,reg fld/fst */
565 {12, 12, 12}, /* cost of loading fp registers
566 in SFmode, DFmode and XFmode */
567 {4, 4, 4}, /* cost of loading integer registers */
568 6, /* cost of moving MMX register */
569 {12, 12}, /* cost of loading MMX registers
570 in SImode and DImode */
571 {12, 12}, /* cost of storing MMX registers
572 in SImode and DImode */
573 6, /* cost of moving SSE register */
574 {12, 12, 12}, /* cost of loading SSE registers
575 in SImode, DImode and TImode */
576 {12, 12, 12}, /* cost of storing SSE registers
577 in SImode, DImode and TImode */
578 8, /* MMX or SSE register to integer */
579 128, /* size of prefetch block */
580 8, /* number of parallel prefetches */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
590 /* Generic64 should produce code tuned for Nocona and K8. */
592 struct processor_costs generic64_cost = {
593 COSTS_N_INSNS (1), /* cost of an add instruction */
594 /* On all chips taken into consideration lea is 2 cycles and more. With
595 this cost however our current implementation of synth_mult results in
596 use of unnecesary temporary registers causing regression on several
597 SPECfp benchmarks. */
598 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (4), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (4), /* DI */
605 COSTS_N_INSNS (2)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (26), /* HI */
609 COSTS_N_INSNS (42), /* SI */
610 COSTS_N_INSNS (74), /* DI */
611 COSTS_N_INSNS (74)}, /* other */
612 COSTS_N_INSNS (1), /* cost of movsx */
613 COSTS_N_INSNS (1), /* cost of movzx */
614 8, /* "large" insn */
616 4, /* cost for loading QImode using movzbl */
617 {4, 4, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {4, 4, 4}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {12, 12, 12}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {6, 6, 8}, /* cost of loading integer registers */
625 2, /* cost of moving MMX register */
626 {8, 8}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {8, 8}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {8, 8, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {8, 8, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
638 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
639 is increased to perhaps more appropriate value of 5. */
641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
649 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
651 struct processor_costs generic32_cost = {
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
654 COSTS_N_INSNS (1), /* variable shift costs */
655 COSTS_N_INSNS (1), /* constant shift costs */
656 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (4), /* HI */
658 COSTS_N_INSNS (3), /* SI */
659 COSTS_N_INSNS (4), /* DI */
660 COSTS_N_INSNS (2)}, /* other */
661 0, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (26), /* HI */
664 COSTS_N_INSNS (42), /* SI */
665 COSTS_N_INSNS (74), /* DI */
666 COSTS_N_INSNS (74)}, /* other */
667 COSTS_N_INSNS (1), /* cost of movsx */
668 COSTS_N_INSNS (1), /* cost of movzx */
669 8, /* "large" insn */
671 4, /* cost for loading QImode using movzbl */
672 {4, 4, 4}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {4, 4, 4}, /* cost of storing integer registers */
676 4, /* cost of reg,reg fld/fst */
677 {12, 12, 12}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {6, 6, 8}, /* cost of loading integer registers */
680 2, /* cost of moving MMX register */
681 {8, 8}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {8, 8}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {8, 8, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {8, 8, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 5, /* MMX or SSE register to integer */
691 64, /* size of prefetch block */
692 6, /* number of parallel prefetches */
694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
702 const struct processor_costs *ix86_cost = &pentium_cost;
704 /* Processor feature/optimization bitmasks. */
705 #define m_386 (1<<PROCESSOR_I386)
706 #define m_486 (1<<PROCESSOR_I486)
707 #define m_PENT (1<<PROCESSOR_PENTIUM)
708 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
709 #define m_K6 (1<<PROCESSOR_K6)
710 #define m_ATHLON (1<<PROCESSOR_ATHLON)
711 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
712 #define m_K8 (1<<PROCESSOR_K8)
713 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
714 #define m_NOCONA (1<<PROCESSOR_NOCONA)
715 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
716 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
719 /* Generic instruction choice should be common subset of supported CPUs
720 (PPro/PENT4/NOCONA/Athlon/K8). */
722 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
723 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
724 generic because it is not working well with PPro base chips. */
725 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
726 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
727 const int x86_zero_extend_with_and = m_486 | m_PENT;
728 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
729 const int x86_double_with_add = ~m_386;
730 const int x86_use_bit_test = m_386;
731 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
732 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
733 const int x86_fisttp = m_NOCONA;
734 const int x86_3dnow_a = m_ATHLON_K8;
735 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
736 /* Branch hints were put in P4 based on simulation result. But
737 after P4 was made, no performance benefit was observed with
738 branch hints. It also increases the code size. As the result,
739 icc never generates branch hints. */
740 const int x86_branch_hints = 0;
741 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
742 /* We probably ought to watch for partial register stalls on Generic32
743 compilation setting as well. However in current implementation the
744 partial register stalls are not eliminated very well - they can
745 be introduced via subregs synthetized by combine and can happen
746 in caller/callee saving sequences.
747 Because this option pays back little on PPro based chips and is in conflict
748 with partial reg. dependencies used by Athlon/P4 based chips, it is better
749 to leave it off for generic32 for now. */
750 const int x86_partial_reg_stall = m_PPRO;
751 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
752 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
753 const int x86_use_mov0 = m_K6;
754 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
755 const int x86_read_modify_write = ~m_PENT;
756 const int x86_read_modify = ~(m_PENT | m_PPRO);
757 const int x86_split_long_moves = m_PPRO;
758 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
759 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
760 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
761 const int x86_qimode_math = ~(0);
762 const int x86_promote_qi_regs = 0;
763 /* On PPro this flag is meant to avoid partial register stalls. Just like
764 the x86_partial_reg_stall this option might be considered for Generic32
765 if our scheme for avoiding partial stalls was more effective. */
766 const int x86_himode_math = ~(m_PPRO);
767 const int x86_promote_hi_regs = m_PPRO;
768 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
769 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
770 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
771 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
772 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
773 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
774 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
775 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
776 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
777 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
778 const int x86_shift1 = ~m_486;
779 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
780 /* In Generic model we have an confict here in between PPro/Pentium4 based chips
781 that thread 128bit SSE registers as single units versus K8 based chips that
782 divide SSE registers to two 64bit halves.
783 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
784 to allow register renaming on 128bit SSE units, but usually results in one
785 extra microop on 64bit SSE units. Experimental results shows that disabling
786 this option on P4 brings over 20% SPECfp regression, while enabling it on
787 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
789 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 /* Set for machines where the type and dependencies are resolved on SSE
791 register parts instead of whole registers, so we may maintain just
792 lower part of scalar values in proper format leaving the upper part
794 const int x86_sse_split_regs = m_ATHLON_K8;
795 const int x86_sse_typeless_stores = m_ATHLON_K8;
796 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
797 const int x86_use_ffreep = m_ATHLON_K8;
798 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
799 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
801 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
802 integer data in xmm registers. Which results in pretty abysmal code. */
803 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
805 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
806 /* Some CPU cores are not able to predict more than 4 branch instructions in
807 the 16 byte window. */
808 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
809 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
810 const int x86_use_bt = m_ATHLON_K8;
811 /* Compare and exchange was added for 80486. */
812 const int x86_cmpxchg = ~m_386;
813 /* Compare and exchange 8 bytes was added for pentium. */
814 const int x86_cmpxchg8b = ~(m_386 | m_486);
815 /* Compare and exchange 16 bytes was added for nocona. */
816 const int x86_cmpxchg16b = m_NOCONA;
817 /* Exchange and add was added for 80486. */
818 const int x86_xadd = ~m_386;
819 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
821 /* In case the average insn count for single function invocation is
822 lower than this constant, emit fast (but longer) prologue and
824 #define FAST_PROLOGUE_INSN_COUNT 20
826 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
827 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
828 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
829 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
831 /* Array of the smallest class containing reg number REGNO, indexed by
832 REGNO. Used by REGNO_REG_CLASS in i386.h. */
834 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
837 AREG, DREG, CREG, BREG,
839 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
841 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
842 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
845 /* flags, fpsr, dirflag, frame */
846 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
847 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
849 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
851 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
852 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
853 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
857 /* The "default" register map used in 32bit mode. */
859 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
861 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
862 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
863 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
864 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
865 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
866 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
867 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
870 static int const x86_64_int_parameter_registers[6] =
872 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
873 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
876 static int const x86_64_int_return_registers[4] =
878 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
881 /* The "default" register map used in 64bit mode. */
882 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
884 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
885 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
886 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
887 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
888 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
889 8,9,10,11,12,13,14,15, /* extended integer registers */
890 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
893 /* Define the register numbers to be used in Dwarf debugging information.
894 The SVR4 reference port C compiler uses the following register numbers
895 in its Dwarf output code:
896 0 for %eax (gcc regno = 0)
897 1 for %ecx (gcc regno = 2)
898 2 for %edx (gcc regno = 1)
899 3 for %ebx (gcc regno = 3)
900 4 for %esp (gcc regno = 7)
901 5 for %ebp (gcc regno = 6)
902 6 for %esi (gcc regno = 4)
903 7 for %edi (gcc regno = 5)
904 The following three DWARF register numbers are never generated by
905 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
906 believes these numbers have these meanings.
907 8 for %eip (no gcc equivalent)
908 9 for %eflags (gcc regno = 17)
909 10 for %trapno (no gcc equivalent)
910 It is not at all clear how we should number the FP stack registers
911 for the x86 architecture. If the version of SDB on x86/svr4 were
912 a bit less brain dead with respect to floating-point then we would
913 have a precedent to follow with respect to DWARF register numbers
914 for x86 FP registers, but the SDB on x86/svr4 is so completely
915 broken with respect to FP registers that it is hardly worth thinking
916 of it as something to strive for compatibility with.
917 The version of x86/svr4 SDB I have at the moment does (partially)
918 seem to believe that DWARF register number 11 is associated with
919 the x86 register %st(0), but that's about all. Higher DWARF
920 register numbers don't seem to be associated with anything in
921 particular, and even for DWARF regno 11, SDB only seems to under-
922 stand that it should say that a variable lives in %st(0) (when
923 asked via an `=' command) if we said it was in DWARF regno 11,
924 but SDB still prints garbage when asked for the value of the
925 variable in question (via a `/' command).
926 (Also note that the labels SDB prints for various FP stack regs
927 when doing an `x' command are all wrong.)
928 Note that these problems generally don't affect the native SVR4
929 C compiler because it doesn't allow the use of -O with -g and
930 because when it is *not* optimizing, it allocates a memory
931 location for each floating-point variable, and the memory
932 location is what gets described in the DWARF AT_location
933 attribute for the variable in question.
934 Regardless of the severe mental illness of the x86/svr4 SDB, we
935 do something sensible here and we use the following DWARF
936 register numbers. Note that these are all stack-top-relative
938 11 for %st(0) (gcc regno = 8)
939 12 for %st(1) (gcc regno = 9)
940 13 for %st(2) (gcc regno = 10)
941 14 for %st(3) (gcc regno = 11)
942 15 for %st(4) (gcc regno = 12)
943 16 for %st(5) (gcc regno = 13)
944 17 for %st(6) (gcc regno = 14)
945 18 for %st(7) (gcc regno = 15)
947 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
949 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
950 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
951 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
952 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
953 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
954 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
955 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
958 /* Test and compare insns in i386.md store the information needed to
959 generate branch and scc insns here. */
961 rtx ix86_compare_op0 = NULL_RTX;
962 rtx ix86_compare_op1 = NULL_RTX;
963 rtx ix86_compare_emitted = NULL_RTX;
965 /* Size of the register save area. */
966 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
968 /* Define the structure for the machine field in struct function. */
970 struct stack_local_entry GTY(())
975 struct stack_local_entry *next;
978 /* Structure describing stack frame layout.
979 Stack grows downward:
985 saved frame pointer if frame_pointer_needed
986 <- HARD_FRAME_POINTER
992 > to_allocate <- FRAME_POINTER
1002 HOST_WIDE_INT frame;
1004 int outgoing_arguments_size;
1007 HOST_WIDE_INT to_allocate;
1008 /* The offsets relative to ARG_POINTER. */
1009 HOST_WIDE_INT frame_pointer_offset;
1010 HOST_WIDE_INT hard_frame_pointer_offset;
1011 HOST_WIDE_INT stack_pointer_offset;
1013 /* When save_regs_using_mov is set, emit prologue using
1014 move instead of push instructions. */
1015 bool save_regs_using_mov;
1018 /* Code model option. */
1019 enum cmodel ix86_cmodel;
1021 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1023 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1025 /* Which unit we are generating floating point math for. */
1026 enum fpmath_unit ix86_fpmath;
1028 /* Which cpu are we scheduling for. */
1029 enum processor_type ix86_tune;
1030 /* Which instruction set architecture to use. */
1031 enum processor_type ix86_arch;
1033 /* true if sse prefetch instruction is not NOOP. */
1034 int x86_prefetch_sse;
1036 /* ix86_regparm_string as a number */
1037 static int ix86_regparm;
1039 /* Preferred alignment for stack boundary in bits. */
1040 unsigned int ix86_preferred_stack_boundary;
1042 /* Values 1-5: see jump.c */
1043 int ix86_branch_cost;
1045 /* Variables which are this size or smaller are put in the data/bss
1046 or ldata/lbss sections. */
1048 int ix86_section_threshold = 65536;
1050 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1051 char internal_label_prefix[16];
1052 int internal_label_prefix_len;
1054 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1055 static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
1057 static bool ix86_handle_option (size_t, const char *, int);
1058 static void output_pic_addr_const (FILE *, rtx, int);
1059 static void put_condition_code (enum rtx_code, enum machine_mode,
1061 static const char *get_some_local_dynamic_name (void);
1062 static int get_some_local_dynamic_name_1 (rtx *, void *);
1063 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1064 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1066 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1067 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1069 static rtx get_thread_pointer (int);
1070 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1071 static void get_pc_thunk_name (char [32], unsigned int);
1072 static rtx gen_push (rtx);
1073 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1074 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
1075 static struct machine_function * ix86_init_machine_status (void);
1076 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1077 static int ix86_nsaved_regs (void);
1078 static void ix86_emit_save_regs (void);
1079 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1080 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1081 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1082 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1083 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1084 static rtx ix86_expand_aligntest (rtx, int);
1085 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1086 static int ix86_issue_rate (void);
1087 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1088 static int ia32_multipass_dfa_lookahead (void);
1089 static void ix86_init_mmx_sse_builtins (void);
1090 static void ix86_init_sse_abi_builtins (void);
1091 static rtx x86_this_parameter (tree);
1092 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1093 HOST_WIDE_INT, tree);
1094 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1095 static void x86_file_start (void);
1096 static void ix86_reorg (void);
1097 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1098 static tree ix86_build_builtin_va_list (void);
1099 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1101 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1102 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1103 static bool ix86_vector_mode_supported_p (enum machine_mode);
1105 static int ix86_address_cost (rtx);
1106 static bool ix86_cannot_force_const_mem (rtx);
1107 static rtx ix86_delegitimize_address (rtx);
1109 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1111 struct builtin_description;
1112 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1114 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1116 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1117 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1118 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1119 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1120 static rtx safe_vector_operand (rtx, enum machine_mode);
1121 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1122 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1123 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1124 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1125 static int ix86_fp_comparison_cost (enum rtx_code code);
1126 static unsigned int ix86_select_alt_pic_regnum (void);
1127 static int ix86_save_reg (unsigned int, int);
1128 static void ix86_compute_frame_layout (struct ix86_frame *);
1129 static int ix86_comp_type_attributes (tree, tree);
1130 static int ix86_function_regparm (tree, tree);
1131 const struct attribute_spec ix86_attribute_table[];
1132 static bool ix86_function_ok_for_sibcall (tree, tree);
1133 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1134 static int ix86_value_regno (enum machine_mode, tree, tree);
1135 static bool contains_128bit_aligned_vector_p (tree);
1136 static rtx ix86_struct_value_rtx (tree, int);
1137 static bool ix86_ms_bitfield_layout_p (tree);
1138 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1139 static int extended_reg_mentioned_1 (rtx *, void *);
1140 static bool ix86_rtx_costs (rtx, int, int, int *);
1141 static int min_insn_size (rtx);
1142 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1143 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1144 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1146 static void ix86_init_builtins (void);
1147 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1148 static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
1149 static const char *ix86_mangle_fundamental_type (tree);
1150 static tree ix86_stack_protect_fail (void);
1151 static rtx ix86_internal_arg_pointer (void);
1152 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1154 /* This function is only used on Solaris. */
1155 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1158 /* Register class used for passing given 64bit part of the argument.
1159 These represent classes as documented by the PS ABI, with the exception
1160 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1161 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1163 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1164 whenever possible (upper half does contain padding).
1166 enum x86_64_reg_class
1169 X86_64_INTEGER_CLASS,
1170 X86_64_INTEGERSI_CLASS,
1177 X86_64_COMPLEX_X87_CLASS,
1180 static const char * const x86_64_reg_class_name[] = {
1181 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1182 "sseup", "x87", "x87up", "cplx87", "no"
1185 #define MAX_CLASSES 4
1187 /* Table of constants used by fldpi, fldln2, etc.... */
1188 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1189 static bool ext_80387_constants_init = 0;
1190 static void init_ext_80387_constants (void);
1191 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1192 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1193 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1194 static section *x86_64_elf_select_section (tree decl, int reloc,
1195 unsigned HOST_WIDE_INT align)
1198 /* Initialize the GCC target structure. */
1199 #undef TARGET_ATTRIBUTE_TABLE
1200 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1201 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1202 # undef TARGET_MERGE_DECL_ATTRIBUTES
1203 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1206 #undef TARGET_COMP_TYPE_ATTRIBUTES
1207 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1209 #undef TARGET_INIT_BUILTINS
1210 #define TARGET_INIT_BUILTINS ix86_init_builtins
1211 #undef TARGET_EXPAND_BUILTIN
1212 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1213 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1214 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1216 #undef TARGET_ASM_FUNCTION_EPILOGUE
1217 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1219 #undef TARGET_ENCODE_SECTION_INFO
1220 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1221 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1223 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1226 #undef TARGET_ASM_OPEN_PAREN
1227 #define TARGET_ASM_OPEN_PAREN ""
1228 #undef TARGET_ASM_CLOSE_PAREN
1229 #define TARGET_ASM_CLOSE_PAREN ""
1231 #undef TARGET_ASM_ALIGNED_HI_OP
1232 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1233 #undef TARGET_ASM_ALIGNED_SI_OP
1234 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1236 #undef TARGET_ASM_ALIGNED_DI_OP
1237 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1240 #undef TARGET_ASM_UNALIGNED_HI_OP
1241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1242 #undef TARGET_ASM_UNALIGNED_SI_OP
1243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1244 #undef TARGET_ASM_UNALIGNED_DI_OP
1245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1247 #undef TARGET_SCHED_ADJUST_COST
1248 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1249 #undef TARGET_SCHED_ISSUE_RATE
1250 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1251 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1252 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1253 ia32_multipass_dfa_lookahead
1255 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1256 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1259 #undef TARGET_HAVE_TLS
1260 #define TARGET_HAVE_TLS true
1262 #undef TARGET_CANNOT_FORCE_CONST_MEM
1263 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1265 #undef TARGET_DELEGITIMIZE_ADDRESS
1266 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1268 #undef TARGET_MS_BITFIELD_LAYOUT_P
1269 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1272 #undef TARGET_BINDS_LOCAL_P
1273 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1276 #undef TARGET_ASM_OUTPUT_MI_THUNK
1277 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1278 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1279 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1281 #undef TARGET_ASM_FILE_START
1282 #define TARGET_ASM_FILE_START x86_file_start
1284 #undef TARGET_DEFAULT_TARGET_FLAGS
1285 #define TARGET_DEFAULT_TARGET_FLAGS \
1287 | TARGET_64BIT_DEFAULT \
1288 | TARGET_SUBTARGET_DEFAULT \
1289 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1291 #undef TARGET_HANDLE_OPTION
1292 #define TARGET_HANDLE_OPTION ix86_handle_option
1294 #undef TARGET_RTX_COSTS
1295 #define TARGET_RTX_COSTS ix86_rtx_costs
1296 #undef TARGET_ADDRESS_COST
1297 #define TARGET_ADDRESS_COST ix86_address_cost
1299 #undef TARGET_FIXED_CONDITION_CODE_REGS
1300 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1301 #undef TARGET_CC_MODES_COMPATIBLE
1302 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1304 #undef TARGET_MACHINE_DEPENDENT_REORG
1305 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1307 #undef TARGET_BUILD_BUILTIN_VA_LIST
1308 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1310 #undef TARGET_MD_ASM_CLOBBERS
1311 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1313 #undef TARGET_PROMOTE_PROTOTYPES
1314 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1315 #undef TARGET_STRUCT_VALUE_RTX
1316 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1317 #undef TARGET_SETUP_INCOMING_VARARGS
1318 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1319 #undef TARGET_MUST_PASS_IN_STACK
1320 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1321 #undef TARGET_PASS_BY_REFERENCE
1322 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1323 #undef TARGET_INTERNAL_ARG_POINTER
1324 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1325 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1326 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1328 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1329 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1331 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1332 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1334 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1335 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1338 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1339 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1342 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1343 #undef TARGET_INSERT_ATTRIBUTES
1344 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1347 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1348 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1350 #undef TARGET_STACK_PROTECT_FAIL
1351 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1353 #undef TARGET_FUNCTION_VALUE
1354 #define TARGET_FUNCTION_VALUE ix86_function_value
1356 struct gcc_target targetm = TARGET_INITIALIZER;
1359 /* The svr4 ABI for the i386 says that records and unions are returned
1361 #ifndef DEFAULT_PCC_STRUCT_RETURN
1362 #define DEFAULT_PCC_STRUCT_RETURN 1
1365 /* Implement TARGET_HANDLE_OPTION. */
1368 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1375 target_flags &= ~MASK_3DNOW_A;
1376 target_flags_explicit |= MASK_3DNOW_A;
1383 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1384 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1391 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1392 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1399 target_flags &= ~MASK_SSE3;
1400 target_flags_explicit |= MASK_SSE3;
1409 /* Sometimes certain combinations of command options do not make
1410 sense on a particular target machine. You can define a macro
1411 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1412 defined, is executed once just after all the command options have
1415 Don't use this macro to turn on various extra optimizations for
1416 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1419 override_options (void)
1422 int ix86_tune_defaulted = 0;
1424 /* Comes from final.c -- no real reason to change it. */
1425 #define MAX_CODE_ALIGN 16
1429 const struct processor_costs *cost; /* Processor costs */
1430 const int target_enable; /* Target flags to enable. */
1431 const int target_disable; /* Target flags to disable. */
1432 const int align_loop; /* Default alignments. */
1433 const int align_loop_max_skip;
1434 const int align_jump;
1435 const int align_jump_max_skip;
1436 const int align_func;
1438 const processor_target_table[PROCESSOR_max] =
1440 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1441 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1442 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1443 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1444 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1445 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1446 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1447 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1448 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1449 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1450 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1453 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1456 const char *const name; /* processor name or nickname. */
1457 const enum processor_type processor;
1458 const enum pta_flags
1464 PTA_PREFETCH_SSE = 16,
1470 const processor_alias_table[] =
1472 {"i386", PROCESSOR_I386, 0},
1473 {"i486", PROCESSOR_I486, 0},
1474 {"i586", PROCESSOR_PENTIUM, 0},
1475 {"pentium", PROCESSOR_PENTIUM, 0},
1476 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1477 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1478 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1479 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1480 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1481 {"i686", PROCESSOR_PENTIUMPRO, 0},
1482 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1483 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1484 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1485 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1486 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1487 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1488 | PTA_MMX | PTA_PREFETCH_SSE},
1489 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1490 | PTA_MMX | PTA_PREFETCH_SSE},
1491 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1492 | PTA_MMX | PTA_PREFETCH_SSE},
1493 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1494 | PTA_MMX | PTA_PREFETCH_SSE},
1495 {"k6", PROCESSOR_K6, PTA_MMX},
1496 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1497 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1498 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1500 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1501 | PTA_3DNOW | PTA_3DNOW_A},
1502 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1503 | PTA_3DNOW_A | PTA_SSE},
1504 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1505 | PTA_3DNOW_A | PTA_SSE},
1506 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1507 | PTA_3DNOW_A | PTA_SSE},
1508 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1509 | PTA_SSE | PTA_SSE2 },
1510 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1511 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1512 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1513 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1514 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1515 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1516 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1517 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1518 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1519 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1522 int const pta_size = ARRAY_SIZE (processor_alias_table);
1524 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1525 SUBTARGET_OVERRIDE_OPTIONS;
1528 /* Set the default values for switches whose default depends on TARGET_64BIT
1529 in case they weren't overwritten by command line options. */
1532 if (flag_omit_frame_pointer == 2)
1533 flag_omit_frame_pointer = 1;
1534 if (flag_asynchronous_unwind_tables == 2)
1535 flag_asynchronous_unwind_tables = 1;
1536 if (flag_pcc_struct_return == 2)
1537 flag_pcc_struct_return = 0;
1541 if (flag_omit_frame_pointer == 2)
1542 flag_omit_frame_pointer = 0;
1543 if (flag_asynchronous_unwind_tables == 2)
1544 flag_asynchronous_unwind_tables = 0;
1545 if (flag_pcc_struct_return == 2)
1546 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1549 /* Need to check -mtune=generic first. */
1550 if (ix86_tune_string)
1552 if (!strcmp (ix86_tune_string, "generic")
1553 || !strcmp (ix86_tune_string, "i686"))
1556 ix86_tune_string = "generic64";
1558 ix86_tune_string = "generic32";
1560 else if (!strncmp (ix86_tune_string, "generic", 7))
1561 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1565 if (ix86_arch_string)
1566 ix86_tune_string = ix86_arch_string;
1567 if (!ix86_tune_string)
1569 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1570 ix86_tune_defaulted = 1;
1573 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1574 need to use a sensible tune option. */
1575 if (!strcmp (ix86_tune_string, "generic")
1576 || !strcmp (ix86_tune_string, "x86-64")
1577 || !strcmp (ix86_tune_string, "i686"))
1580 ix86_tune_string = "generic64";
1582 ix86_tune_string = "generic32";
1585 if (!strcmp (ix86_tune_string, "x86-64"))
1586 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1587 "-mtune=generic instead as appropriate.");
1589 if (!ix86_arch_string)
1590 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1591 if (!strcmp (ix86_arch_string, "generic"))
1592 error ("generic CPU can be used only for -mtune= switch");
1593 if (!strncmp (ix86_arch_string, "generic", 7))
1594 error ("bad value (%s) for -march= switch", ix86_arch_string);
1596 if (ix86_cmodel_string != 0)
1598 if (!strcmp (ix86_cmodel_string, "small"))
1599 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1600 else if (!strcmp (ix86_cmodel_string, "medium"))
1601 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1603 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1604 else if (!strcmp (ix86_cmodel_string, "32"))
1605 ix86_cmodel = CM_32;
1606 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1607 ix86_cmodel = CM_KERNEL;
1608 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1609 ix86_cmodel = CM_LARGE;
1611 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1615 ix86_cmodel = CM_32;
1617 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1619 if (ix86_asm_string != 0)
1622 && !strcmp (ix86_asm_string, "intel"))
1623 ix86_asm_dialect = ASM_INTEL;
1624 else if (!strcmp (ix86_asm_string, "att"))
1625 ix86_asm_dialect = ASM_ATT;
1627 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1629 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1630 error ("code model %qs not supported in the %s bit mode",
1631 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1632 if (ix86_cmodel == CM_LARGE)
1633 sorry ("code model %<large%> not supported yet");
1634 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1635 sorry ("%i-bit mode not compiled in",
1636 (target_flags & MASK_64BIT) ? 64 : 32);
1638 for (i = 0; i < pta_size; i++)
1639 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1641 ix86_arch = processor_alias_table[i].processor;
1642 /* Default cpu tuning to the architecture. */
1643 ix86_tune = ix86_arch;
1644 if (processor_alias_table[i].flags & PTA_MMX
1645 && !(target_flags_explicit & MASK_MMX))
1646 target_flags |= MASK_MMX;
1647 if (processor_alias_table[i].flags & PTA_3DNOW
1648 && !(target_flags_explicit & MASK_3DNOW))
1649 target_flags |= MASK_3DNOW;
1650 if (processor_alias_table[i].flags & PTA_3DNOW_A
1651 && !(target_flags_explicit & MASK_3DNOW_A))
1652 target_flags |= MASK_3DNOW_A;
1653 if (processor_alias_table[i].flags & PTA_SSE
1654 && !(target_flags_explicit & MASK_SSE))
1655 target_flags |= MASK_SSE;
1656 if (processor_alias_table[i].flags & PTA_SSE2
1657 && !(target_flags_explicit & MASK_SSE2))
1658 target_flags |= MASK_SSE2;
1659 if (processor_alias_table[i].flags & PTA_SSE3
1660 && !(target_flags_explicit & MASK_SSE3))
1661 target_flags |= MASK_SSE3;
1662 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1663 x86_prefetch_sse = true;
1664 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1665 error ("CPU you selected does not support x86-64 "
1671 error ("bad value (%s) for -march= switch", ix86_arch_string);
1673 for (i = 0; i < pta_size; i++)
1674 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1676 ix86_tune = processor_alias_table[i].processor;
1677 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1679 if (ix86_tune_defaulted)
1681 ix86_tune_string = "x86-64";
1682 for (i = 0; i < pta_size; i++)
1683 if (! strcmp (ix86_tune_string,
1684 processor_alias_table[i].name))
1686 ix86_tune = processor_alias_table[i].processor;
1689 error ("CPU you selected does not support x86-64 "
1692 /* Intel CPUs have always interpreted SSE prefetch instructions as
1693 NOPs; so, we can enable SSE prefetch instructions even when
1694 -mtune (rather than -march) points us to a processor that has them.
1695 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1696 higher processors. */
1697 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1698 x86_prefetch_sse = true;
1702 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1705 ix86_cost = &size_cost;
1707 ix86_cost = processor_target_table[ix86_tune].cost;
1708 target_flags |= processor_target_table[ix86_tune].target_enable;
1709 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1711 /* Arrange to set up i386_stack_locals for all functions. */
1712 init_machine_status = ix86_init_machine_status;
1714 /* Validate -mregparm= value. */
1715 if (ix86_regparm_string)
1717 i = atoi (ix86_regparm_string);
1718 if (i < 0 || i > REGPARM_MAX)
1719 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1725 ix86_regparm = REGPARM_MAX;
1727 /* If the user has provided any of the -malign-* options,
1728 warn and use that value only if -falign-* is not set.
1729 Remove this code in GCC 3.2 or later. */
1730 if (ix86_align_loops_string)
1732 warning (0, "-malign-loops is obsolete, use -falign-loops");
1733 if (align_loops == 0)
1735 i = atoi (ix86_align_loops_string);
1736 if (i < 0 || i > MAX_CODE_ALIGN)
1737 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1739 align_loops = 1 << i;
1743 if (ix86_align_jumps_string)
1745 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1746 if (align_jumps == 0)
1748 i = atoi (ix86_align_jumps_string);
1749 if (i < 0 || i > MAX_CODE_ALIGN)
1750 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1752 align_jumps = 1 << i;
1756 if (ix86_align_funcs_string)
1758 warning (0, "-malign-functions is obsolete, use -falign-functions");
1759 if (align_functions == 0)
1761 i = atoi (ix86_align_funcs_string);
1762 if (i < 0 || i > MAX_CODE_ALIGN)
1763 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1765 align_functions = 1 << i;
1769 /* Default align_* from the processor table. */
1770 if (align_loops == 0)
1772 align_loops = processor_target_table[ix86_tune].align_loop;
1773 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1775 if (align_jumps == 0)
1777 align_jumps = processor_target_table[ix86_tune].align_jump;
1778 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1780 if (align_functions == 0)
1782 align_functions = processor_target_table[ix86_tune].align_func;
1785 /* Validate -mpreferred-stack-boundary= value, or provide default.
1786 The default of 128 bits is for Pentium III's SSE __m128, but we
1787 don't want additional code to keep the stack aligned when
1788 optimizing for code size. */
1789 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1791 if (ix86_preferred_stack_boundary_string)
1793 i = atoi (ix86_preferred_stack_boundary_string);
1794 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1795 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1796 TARGET_64BIT ? 4 : 2);
1798 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1801 /* Validate -mbranch-cost= value, or provide default. */
1802 ix86_branch_cost = ix86_cost->branch_cost;
1803 if (ix86_branch_cost_string)
1805 i = atoi (ix86_branch_cost_string);
1807 error ("-mbranch-cost=%d is not between 0 and 5", i);
1809 ix86_branch_cost = i;
1811 if (ix86_section_threshold_string)
1813 i = atoi (ix86_section_threshold_string);
1815 error ("-mlarge-data-threshold=%d is negative", i);
1817 ix86_section_threshold = i;
1820 if (ix86_tls_dialect_string)
1822 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1823 ix86_tls_dialect = TLS_DIALECT_GNU;
1824 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1825 ix86_tls_dialect = TLS_DIALECT_GNU2;
1826 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1827 ix86_tls_dialect = TLS_DIALECT_SUN;
1829 error ("bad value (%s) for -mtls-dialect= switch",
1830 ix86_tls_dialect_string);
1833 /* Keep nonleaf frame pointers. */
1834 if (flag_omit_frame_pointer)
1835 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1836 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1837 flag_omit_frame_pointer = 1;
1839 /* If we're doing fast math, we don't care about comparison order
1840 wrt NaNs. This lets us use a shorter comparison sequence. */
1841 if (flag_unsafe_math_optimizations)
1842 target_flags &= ~MASK_IEEE_FP;
1844 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1845 since the insns won't need emulation. */
1846 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1847 target_flags &= ~MASK_NO_FANCY_MATH_387;
1849 /* Likewise, if the target doesn't have a 387, or we've specified
1850 software floating point, don't use 387 inline intrinsics. */
1852 target_flags |= MASK_NO_FANCY_MATH_387;
1854 /* Turn on SSE2 builtins for -msse3. */
1856 target_flags |= MASK_SSE2;
1858 /* Turn on SSE builtins for -msse2. */
1860 target_flags |= MASK_SSE;
1862 /* Turn on MMX builtins for -msse. */
1865 target_flags |= MASK_MMX & ~target_flags_explicit;
1866 x86_prefetch_sse = true;
1869 /* Turn on MMX builtins for 3Dnow. */
1871 target_flags |= MASK_MMX;
1875 if (TARGET_ALIGN_DOUBLE)
1876 error ("-malign-double makes no sense in the 64bit mode");
1878 error ("-mrtd calling convention not supported in the 64bit mode");
1880 /* Enable by default the SSE and MMX builtins. Do allow the user to
1881 explicitly disable any of these. In particular, disabling SSE and
1882 MMX for kernel code is extremely useful. */
1884 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1885 & ~target_flags_explicit);
1889 /* i386 ABI does not specify red zone. It still makes sense to use it
1890 when programmer takes care to stack from being destroyed. */
1891 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1892 target_flags |= MASK_NO_RED_ZONE;
1895 /* Accept -msseregparm only if at least SSE support is enabled. */
1896 if (TARGET_SSEREGPARM
1898 error ("-msseregparm used without SSE enabled");
1900 /* Accept -msselibm only if at least SSE support is enabled. */
1903 error ("-msselibm used without SSE2 enabled");
1905 /* Ignore -msselibm on 64bit targets. */
1908 error ("-msselibm used on a 64bit target");
1910 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1912 if (ix86_fpmath_string != 0)
1914 if (! strcmp (ix86_fpmath_string, "387"))
1915 ix86_fpmath = FPMATH_387;
1916 else if (! strcmp (ix86_fpmath_string, "sse"))
1920 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1921 ix86_fpmath = FPMATH_387;
1924 ix86_fpmath = FPMATH_SSE;
1926 else if (! strcmp (ix86_fpmath_string, "387,sse")
1927 || ! strcmp (ix86_fpmath_string, "sse,387"))
1931 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1932 ix86_fpmath = FPMATH_387;
1934 else if (!TARGET_80387)
1936 warning (0, "387 instruction set disabled, using SSE arithmetics");
1937 ix86_fpmath = FPMATH_SSE;
1940 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1943 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1946 /* If the i387 is disabled, then do not return values in it. */
1948 target_flags &= ~MASK_FLOAT_RETURNS;
1950 if ((x86_accumulate_outgoing_args & TUNEMASK)
1951 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1953 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1955 /* ??? Unwind info is not correct around the CFG unless either a frame
1956 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1957 unwind info generation to be aware of the CFG and propagating states
1959 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1960 || flag_exceptions || flag_non_call_exceptions)
1961 && flag_omit_frame_pointer
1962 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1964 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1965 warning (0, "unwind tables currently require either a frame pointer "
1966 "or -maccumulate-outgoing-args for correctness");
1967 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1970 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1973 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1974 p = strchr (internal_label_prefix, 'X');
1975 internal_label_prefix_len = p - internal_label_prefix;
1979 /* When scheduling description is not available, disable scheduler pass
1980 so it won't slow down the compilation and make x87 code slower. */
1981 if (!TARGET_SCHEDULE)
1982 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1985 /* switch to the appropriate section for output of DECL.
1986 DECL is either a `VAR_DECL' node or a constant of some sort.
1987 RELOC indicates whether forming the initial value of DECL requires
1988 link-time relocations. */
1991 x86_64_elf_select_section (tree decl, int reloc,
1992 unsigned HOST_WIDE_INT align)
1994 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1995 && ix86_in_large_data_p (decl))
1997 const char *sname = NULL;
1998 unsigned int flags = SECTION_WRITE;
1999 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2004 case SECCAT_DATA_REL:
2005 sname = ".ldata.rel";
2007 case SECCAT_DATA_REL_LOCAL:
2008 sname = ".ldata.rel.local";
2010 case SECCAT_DATA_REL_RO:
2011 sname = ".ldata.rel.ro";
2013 case SECCAT_DATA_REL_RO_LOCAL:
2014 sname = ".ldata.rel.ro.local";
2018 flags |= SECTION_BSS;
2021 case SECCAT_RODATA_MERGE_STR:
2022 case SECCAT_RODATA_MERGE_STR_INIT:
2023 case SECCAT_RODATA_MERGE_CONST:
2027 case SECCAT_SRODATA:
2034 /* We don't split these for medium model. Place them into
2035 default sections and hope for best. */
2040 /* We might get called with string constants, but get_named_section
2041 doesn't like them as they are not DECLs. Also, we need to set
2042 flags in that case. */
2044 return get_section (sname, flags, NULL);
2045 return get_named_section (decl, sname, reloc);
2048 return default_elf_select_section (decl, reloc, align);
2051 /* Build up a unique section name, expressed as a
2052 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2053 RELOC indicates whether the initial value of EXP requires
2054 link-time relocations. */
2057 x86_64_elf_unique_section (tree decl, int reloc)
2059 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2060 && ix86_in_large_data_p (decl))
2062 const char *prefix = NULL;
2063 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2064 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2066 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2069 case SECCAT_DATA_REL:
2070 case SECCAT_DATA_REL_LOCAL:
2071 case SECCAT_DATA_REL_RO:
2072 case SECCAT_DATA_REL_RO_LOCAL:
2073 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2076 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2079 case SECCAT_RODATA_MERGE_STR:
2080 case SECCAT_RODATA_MERGE_STR_INIT:
2081 case SECCAT_RODATA_MERGE_CONST:
2082 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2084 case SECCAT_SRODATA:
2091 /* We don't split these for medium model. Place them into
2092 default sections and hope for best. */
2100 plen = strlen (prefix);
2102 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2103 name = targetm.strip_name_encoding (name);
2104 nlen = strlen (name);
2106 string = alloca (nlen + plen + 1);
2107 memcpy (string, prefix, plen);
2108 memcpy (string + plen, name, nlen + 1);
2110 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2114 default_unique_section (decl, reloc);
2117 #ifdef COMMON_ASM_OP
2118 /* This says how to output assembler code to declare an
2119 uninitialized external linkage data object.
2121 For medium model x86-64 we need to use .largecomm opcode for
2124 x86_elf_aligned_common (FILE *file,
2125 const char *name, unsigned HOST_WIDE_INT size,
2128 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2129 && size > (unsigned int)ix86_section_threshold)
2130 fprintf (file, ".largecomm\t");
2132 fprintf (file, "%s", COMMON_ASM_OP);
2133 assemble_name (file, name);
2134 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2135 size, align / BITS_PER_UNIT);
2138 /* Utility function for targets to use in implementing
2139 ASM_OUTPUT_ALIGNED_BSS. */
2142 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2143 const char *name, unsigned HOST_WIDE_INT size,
2146 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2147 && size > (unsigned int)ix86_section_threshold)
2148 switch_to_section (get_named_section (decl, ".lbss", 0));
2150 switch_to_section (bss_section);
2151 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2152 #ifdef ASM_DECLARE_OBJECT_NAME
2153 last_assemble_variable_decl = decl;
2154 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2156 /* Standard thing is just output label for the object. */
2157 ASM_OUTPUT_LABEL (file, name);
2158 #endif /* ASM_DECLARE_OBJECT_NAME */
2159 ASM_OUTPUT_SKIP (file, size ? size : 1);
2164 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2166 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2167 make the problem with not enough registers even worse. */
2168 #ifdef INSN_SCHEDULING
2170 flag_schedule_insns = 0;
2174 /* The Darwin libraries never set errno, so we might as well
2175 avoid calling them when that's the only reason we would. */
2176 flag_errno_math = 0;
2178 /* The default values of these switches depend on the TARGET_64BIT
2179 that is not known at this moment. Mark these values with 2 and
2180 let user the to override these. In case there is no command line option
2181 specifying them, we will set the defaults in override_options. */
2183 flag_omit_frame_pointer = 2;
2184 flag_pcc_struct_return = 2;
2185 flag_asynchronous_unwind_tables = 2;
2186 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2187 SUBTARGET_OPTIMIZATION_OPTIONS;
2191 /* Table of valid machine attributes. */
2192 const struct attribute_spec ix86_attribute_table[] =
2194 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2195 /* Stdcall attribute says callee is responsible for popping arguments
2196 if they are not variable. */
2197 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2198 /* Fastcall attribute says callee is responsible for popping arguments
2199 if they are not variable. */
2200 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2201 /* Cdecl attribute says the callee is a normal C declaration */
2202 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2203 /* Regparm attribute specifies how many integer arguments are to be
2204 passed in registers. */
2205 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2206 /* Sseregparm attribute says we are using x86_64 calling conventions
2207 for FP arguments. */
2208 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2209 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2210 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2211 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2212 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2214 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2215 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2216 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2217 SUBTARGET_ATTRIBUTE_TABLE,
2219 { NULL, 0, 0, false, false, false, NULL }
2222 /* Decide whether we can make a sibling call to a function. DECL is the
2223 declaration of the function being targeted by the call and EXP is the
2224 CALL_EXPR representing the call. */
2227 ix86_function_ok_for_sibcall (tree decl, tree exp)
2232 /* If we are generating position-independent code, we cannot sibcall
2233 optimize any indirect call, or a direct call to a global function,
2234 as the PLT requires %ebx be live. */
2235 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2242 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2243 if (POINTER_TYPE_P (func))
2244 func = TREE_TYPE (func);
2247 /* Check that the return value locations are the same. Like
2248 if we are returning floats on the 80387 register stack, we cannot
2249 make a sibcall from a function that doesn't return a float to a
2250 function that does or, conversely, from a function that does return
2251 a float to a function that doesn't; the necessary stack adjustment
2252 would not be executed. This is also the place we notice
2253 differences in the return value ABI. Note that it is ok for one
2254 of the functions to have void return type as long as the return
2255 value of the other is passed in a register. */
2256 a = ix86_function_value (TREE_TYPE (exp), func, false);
2257 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2259 if (STACK_REG_P (a) || STACK_REG_P (b))
2261 if (!rtx_equal_p (a, b))
2264 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2266 else if (!rtx_equal_p (a, b))
2269 /* If this call is indirect, we'll need to be able to use a call-clobbered
2270 register for the address of the target function. Make sure that all
2271 such registers are not used for passing parameters. */
2272 if (!decl && !TARGET_64BIT)
2276 /* We're looking at the CALL_EXPR, we need the type of the function. */
2277 type = TREE_OPERAND (exp, 0); /* pointer expression */
2278 type = TREE_TYPE (type); /* pointer type */
2279 type = TREE_TYPE (type); /* function type */
2281 if (ix86_function_regparm (type, NULL) >= 3)
2283 /* ??? Need to count the actual number of registers to be used,
2284 not the possible number of registers. Fix later. */
2289 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2290 /* Dllimport'd functions are also called indirectly. */
2291 if (decl && DECL_DLLIMPORT_P (decl)
2292 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2296 /* If we forced aligned the stack, then sibcalling would unalign the
2297 stack, which may break the called function. */
2298 if (cfun->machine->force_align_arg_pointer)
2301 /* Otherwise okay. That also includes certain types of indirect calls. */
2305 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2306 calling convention attributes;
2307 arguments as in struct attribute_spec.handler. */
2310 ix86_handle_cconv_attribute (tree *node, tree name,
2312 int flags ATTRIBUTE_UNUSED,
2315 if (TREE_CODE (*node) != FUNCTION_TYPE
2316 && TREE_CODE (*node) != METHOD_TYPE
2317 && TREE_CODE (*node) != FIELD_DECL
2318 && TREE_CODE (*node) != TYPE_DECL)
2320 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2321 IDENTIFIER_POINTER (name));
2322 *no_add_attrs = true;
2326 /* Can combine regparm with all attributes but fastcall. */
2327 if (is_attribute_p ("regparm", name))
2331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2333 error ("fastcall and regparm attributes are not compatible");
2336 cst = TREE_VALUE (args);
2337 if (TREE_CODE (cst) != INTEGER_CST)
2339 warning (OPT_Wattributes,
2340 "%qs attribute requires an integer constant argument",
2341 IDENTIFIER_POINTER (name));
2342 *no_add_attrs = true;
2344 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2346 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2347 IDENTIFIER_POINTER (name), REGPARM_MAX);
2348 *no_add_attrs = true;
2356 warning (OPT_Wattributes, "%qs attribute ignored",
2357 IDENTIFIER_POINTER (name));
2358 *no_add_attrs = true;
2362 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2363 if (is_attribute_p ("fastcall", name))
2365 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2367 error ("fastcall and cdecl attributes are not compatible");
2369 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2371 error ("fastcall and stdcall attributes are not compatible");
2373 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2375 error ("fastcall and regparm attributes are not compatible");
2379 /* Can combine stdcall with fastcall (redundant), regparm and
2381 else if (is_attribute_p ("stdcall", name))
2383 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2385 error ("stdcall and cdecl attributes are not compatible");
2387 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2389 error ("stdcall and fastcall attributes are not compatible");
2393 /* Can combine cdecl with regparm and sseregparm. */
2394 else if (is_attribute_p ("cdecl", name))
2396 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2398 error ("stdcall and cdecl attributes are not compatible");
2400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2402 error ("fastcall and cdecl attributes are not compatible");
2406 /* Can combine sseregparm with all attributes. */
2411 /* Return 0 if the attributes for two types are incompatible, 1 if they
2412 are compatible, and 2 if they are nearly compatible (which causes a
2413 warning to be generated). */
2416 ix86_comp_type_attributes (tree type1, tree type2)
2418 /* Check for mismatch of non-default calling convention. */
2419 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2421 if (TREE_CODE (type1) != FUNCTION_TYPE)
2424 /* Check for mismatched fastcall/regparm types. */
2425 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2426 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2427 || (ix86_function_regparm (type1, NULL)
2428 != ix86_function_regparm (type2, NULL)))
2431 /* Check for mismatched sseregparm types. */
2432 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2433 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2436 /* Check for mismatched return types (cdecl vs stdcall). */
2437 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2438 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2444 /* Return the regparm value for a function with the indicated TYPE and DECL.
2445 DECL may be NULL when calling function indirectly
2446 or considering a libcall. */
2449 ix86_function_regparm (tree type, tree decl)
2452 int regparm = ix86_regparm;
2453 bool user_convention = false;
2457 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2460 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2461 user_convention = true;
2464 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2467 user_convention = true;
2470 /* Use register calling convention for local functions when possible. */
2471 if (!TARGET_64BIT && !user_convention && decl
2472 && flag_unit_at_a_time && !profile_flag)
2474 struct cgraph_local_info *i = cgraph_local_info (decl);
2477 int local_regparm, globals = 0, regno;
2479 /* Make sure no regparm register is taken by a global register
2481 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2482 if (global_regs[local_regparm])
2484 /* We can't use regparm(3) for nested functions as these use
2485 static chain pointer in third argument. */
2486 if (local_regparm == 3
2487 && decl_function_context (decl)
2488 && !DECL_NO_STATIC_CHAIN (decl))
2490 /* Each global register variable increases register preassure,
2491 so the more global reg vars there are, the smaller regparm
2492 optimization use, unless requested by the user explicitly. */
2493 for (regno = 0; regno < 6; regno++)
2494 if (global_regs[regno])
2497 = globals < local_regparm ? local_regparm - globals : 0;
2499 if (local_regparm > regparm)
2500 regparm = local_regparm;
2507 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2508 in SSE registers for a function with the indicated TYPE and DECL.
2509 DECL may be NULL when calling function indirectly
2510 or considering a libcall. Otherwise return 0. */
2513 ix86_function_sseregparm (tree type, tree decl)
2515 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2516 by the sseregparm attribute. */
2517 if (TARGET_SSEREGPARM
2519 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2524 error ("Calling %qD with attribute sseregparm without "
2525 "SSE/SSE2 enabled", decl);
2527 error ("Calling %qT with attribute sseregparm without "
2528 "SSE/SSE2 enabled", type);
2535 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2536 in SSE registers even for 32-bit mode and not just 3, but up to
2537 8 SSE arguments in registers. */
2538 if (!TARGET_64BIT && decl
2539 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2541 struct cgraph_local_info *i = cgraph_local_info (decl);
2543 return TARGET_SSE2 ? 2 : 1;
2549 /* Return true if EAX is live at the start of the function. Used by
2550 ix86_expand_prologue to determine if we need special help before
2551 calling allocate_stack_worker. */
2554 ix86_eax_live_at_start_p (void)
2556 /* Cheat. Don't bother working forward from ix86_function_regparm
2557 to the function type to whether an actual argument is located in
2558 eax. Instead just look at cfg info, which is still close enough
2559 to correct at this point. This gives false positives for broken
2560 functions that might use uninitialized data that happens to be
2561 allocated in eax, but who cares? */
2562 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2565 /* Value is the number of bytes of arguments automatically
2566 popped when returning from a subroutine call.
2567 FUNDECL is the declaration node of the function (as a tree),
2568 FUNTYPE is the data type of the function (as a tree),
2569 or for a library call it is an identifier node for the subroutine name.
2570 SIZE is the number of bytes of arguments passed on the stack.
2572 On the 80386, the RTD insn may be used to pop them if the number
2573 of args is fixed, but if the number is variable then the caller
2574 must pop them all. RTD can't be used for library calls now
2575 because the library is compiled with the Unix compiler.
2576 Use of RTD is a selectable option, since it is incompatible with
2577 standard Unix calling sequences. If the option is not selected,
2578 the caller must always pop the args.
2580 The attribute stdcall is equivalent to RTD on a per module basis. */
2583 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2585 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2587 /* Cdecl functions override -mrtd, and never pop the stack. */
2588 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2590 /* Stdcall and fastcall functions will pop the stack if not
2592 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2593 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2597 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2598 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2599 == void_type_node)))
2603 /* Lose any fake structure return argument if it is passed on the stack. */
2604 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2606 && !KEEP_AGGREGATE_RETURN_POINTER)
2608 int nregs = ix86_function_regparm (funtype, fundecl);
2611 return GET_MODE_SIZE (Pmode);
2617 /* Argument support functions. */
2619 /* Return true when register may be used to pass function parameters. */
2621 ix86_function_arg_regno_p (int regno)
2625 return (regno < REGPARM_MAX
2626 || (TARGET_MMX && MMX_REGNO_P (regno)
2627 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2628 || (TARGET_SSE && SSE_REGNO_P (regno)
2629 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2631 if (TARGET_SSE && SSE_REGNO_P (regno)
2632 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2634 /* RAX is used as hidden argument to va_arg functions. */
2637 for (i = 0; i < REGPARM_MAX; i++)
2638 if (regno == x86_64_int_parameter_registers[i])
2643 /* Return if we do not know how to pass TYPE solely in registers. */
2646 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2648 if (must_pass_in_stack_var_size_or_pad (mode, type))
2651 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2652 The layout_type routine is crafty and tries to trick us into passing
2653 currently unsupported vector types on the stack by using TImode. */
2654 return (!TARGET_64BIT && mode == TImode
2655 && type && TREE_CODE (type) != VECTOR_TYPE);
2658 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2659 for a call to a function whose data type is FNTYPE.
2660 For a library call, FNTYPE is 0. */
2663 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2664 tree fntype, /* tree ptr for function decl */
2665 rtx libname, /* SYMBOL_REF of library name or 0 */
2668 static CUMULATIVE_ARGS zero_cum;
2669 tree param, next_param;
2671 if (TARGET_DEBUG_ARG)
2673 fprintf (stderr, "\ninit_cumulative_args (");
2675 fprintf (stderr, "fntype code = %s, ret code = %s",
2676 tree_code_name[(int) TREE_CODE (fntype)],
2677 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2679 fprintf (stderr, "no fntype");
2682 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2687 /* Set up the number of registers to use for passing arguments. */
2688 cum->nregs = ix86_regparm;
2690 cum->sse_nregs = SSE_REGPARM_MAX;
2692 cum->mmx_nregs = MMX_REGPARM_MAX;
2693 cum->warn_sse = true;
2694 cum->warn_mmx = true;
2695 cum->maybe_vaarg = false;
2697 /* Use ecx and edx registers if function has fastcall attribute,
2698 else look for regparm information. */
2699 if (fntype && !TARGET_64BIT)
2701 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2707 cum->nregs = ix86_function_regparm (fntype, fndecl);
2710 /* Set up the number of SSE registers used for passing SFmode
2711 and DFmode arguments. Warn for mismatching ABI. */
2712 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2714 /* Determine if this function has variable arguments. This is
2715 indicated by the last argument being 'void_type_mode' if there
2716 are no variable arguments. If there are variable arguments, then
2717 we won't pass anything in registers in 32-bit mode. */
2719 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2721 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2722 param != 0; param = next_param)
2724 next_param = TREE_CHAIN (param);
2725 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2735 cum->float_in_sse = 0;
2737 cum->maybe_vaarg = true;
2741 if ((!fntype && !libname)
2742 || (fntype && !TYPE_ARG_TYPES (fntype)))
2743 cum->maybe_vaarg = true;
2745 if (TARGET_DEBUG_ARG)
2746 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2751 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2752 But in the case of vector types, it is some vector mode.
2754 When we have only some of our vector isa extensions enabled, then there
2755 are some modes for which vector_mode_supported_p is false. For these
2756 modes, the generic vector support in gcc will choose some non-vector mode
2757 in order to implement the type. By computing the natural mode, we'll
2758 select the proper ABI location for the operand and not depend on whatever
2759 the middle-end decides to do with these vector types. */
2761 static enum machine_mode
2762 type_natural_mode (tree type)
2764 enum machine_mode mode = TYPE_MODE (type);
2766 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2768 HOST_WIDE_INT size = int_size_in_bytes (type);
2769 if ((size == 8 || size == 16)
2770 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2771 && TYPE_VECTOR_SUBPARTS (type) > 1)
2773 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2775 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2776 mode = MIN_MODE_VECTOR_FLOAT;
2778 mode = MIN_MODE_VECTOR_INT;
2780 /* Get the mode which has this inner mode and number of units. */
2781 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2782 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2783 && GET_MODE_INNER (mode) == innermode)
2793 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2794 this may not agree with the mode that the type system has chosen for the
2795 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2796 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2799 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2804 if (orig_mode != BLKmode)
2805 tmp = gen_rtx_REG (orig_mode, regno);
2808 tmp = gen_rtx_REG (mode, regno);
2809 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2810 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2816 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2817 of this code is to classify each 8bytes of incoming argument by the register
2818 class and assign registers accordingly. */
2820 /* Return the union class of CLASS1 and CLASS2.
2821 See the x86-64 PS ABI for details. */
2823 static enum x86_64_reg_class
2824 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2826 /* Rule #1: If both classes are equal, this is the resulting class. */
2827 if (class1 == class2)
2830 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2832 if (class1 == X86_64_NO_CLASS)
2834 if (class2 == X86_64_NO_CLASS)
2837 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2838 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2839 return X86_64_MEMORY_CLASS;
2841 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2842 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2843 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2844 return X86_64_INTEGERSI_CLASS;
2845 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2846 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2847 return X86_64_INTEGER_CLASS;
2849 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2851 if (class1 == X86_64_X87_CLASS
2852 || class1 == X86_64_X87UP_CLASS
2853 || class1 == X86_64_COMPLEX_X87_CLASS
2854 || class2 == X86_64_X87_CLASS
2855 || class2 == X86_64_X87UP_CLASS
2856 || class2 == X86_64_COMPLEX_X87_CLASS)
2857 return X86_64_MEMORY_CLASS;
2859 /* Rule #6: Otherwise class SSE is used. */
2860 return X86_64_SSE_CLASS;
2863 /* Classify the argument of type TYPE and mode MODE.
2864 CLASSES will be filled by the register class used to pass each word
2865 of the operand. The number of words is returned. In case the parameter
2866 should be passed in memory, 0 is returned. As a special case for zero
2867 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2869 BIT_OFFSET is used internally for handling records and specifies offset
2870 of the offset in bits modulo 256 to avoid overflow cases.
2872 See the x86-64 PS ABI for details.
2876 classify_argument (enum machine_mode mode, tree type,
2877 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2879 HOST_WIDE_INT bytes =
2880 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2881 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2883 /* Variable sized entities are always passed/returned in memory. */
2887 if (mode != VOIDmode
2888 && targetm.calls.must_pass_in_stack (mode, type))
2891 if (type && AGGREGATE_TYPE_P (type))
2895 enum x86_64_reg_class subclasses[MAX_CLASSES];
2897 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2901 for (i = 0; i < words; i++)
2902 classes[i] = X86_64_NO_CLASS;
2904 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2905 signalize memory class, so handle it as special case. */
2908 classes[0] = X86_64_NO_CLASS;
2912 /* Classify each field of record and merge classes. */
2913 switch (TREE_CODE (type))
2916 /* For classes first merge in the field of the subclasses. */
2917 if (TYPE_BINFO (type))
2919 tree binfo, base_binfo;
2922 for (binfo = TYPE_BINFO (type), basenum = 0;
2923 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2926 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2927 tree type = BINFO_TYPE (base_binfo);
2929 num = classify_argument (TYPE_MODE (type),
2931 (offset + bit_offset) % 256);
2934 for (i = 0; i < num; i++)
2936 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2938 merge_classes (subclasses[i], classes[i + pos]);
2942 /* And now merge the fields of structure. */
2943 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2945 if (TREE_CODE (field) == FIELD_DECL)
2949 /* Bitfields are always classified as integer. Handle them
2950 early, since later code would consider them to be
2951 misaligned integers. */
2952 if (DECL_BIT_FIELD (field))
2954 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2955 i < ((int_bit_position (field) + (bit_offset % 64))
2956 + tree_low_cst (DECL_SIZE (field), 0)
2959 merge_classes (X86_64_INTEGER_CLASS,
2964 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2965 TREE_TYPE (field), subclasses,
2966 (int_bit_position (field)
2967 + bit_offset) % 256);
2970 for (i = 0; i < num; i++)
2973 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2975 merge_classes (subclasses[i], classes[i + pos]);
2983 /* Arrays are handled as small records. */
2986 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2987 TREE_TYPE (type), subclasses, bit_offset);
2991 /* The partial classes are now full classes. */
2992 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2993 subclasses[0] = X86_64_SSE_CLASS;
2994 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2995 subclasses[0] = X86_64_INTEGER_CLASS;
2997 for (i = 0; i < words; i++)
2998 classes[i] = subclasses[i % num];
3003 case QUAL_UNION_TYPE:
3004 /* Unions are similar to RECORD_TYPE but offset is always 0.
3007 /* Unions are not derived. */
3008 gcc_assert (!TYPE_BINFO (type)
3009 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3010 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3012 if (TREE_CODE (field) == FIELD_DECL)
3015 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3016 TREE_TYPE (field), subclasses,
3020 for (i = 0; i < num; i++)
3021 classes[i] = merge_classes (subclasses[i], classes[i]);
3030 /* Final merger cleanup. */
3031 for (i = 0; i < words; i++)
3033 /* If one class is MEMORY, everything should be passed in
3035 if (classes[i] == X86_64_MEMORY_CLASS)
3038 /* The X86_64_SSEUP_CLASS should be always preceded by
3039 X86_64_SSE_CLASS. */
3040 if (classes[i] == X86_64_SSEUP_CLASS
3041 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3042 classes[i] = X86_64_SSE_CLASS;
3044 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3045 if (classes[i] == X86_64_X87UP_CLASS
3046 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3047 classes[i] = X86_64_SSE_CLASS;
3052 /* Compute alignment needed. We align all types to natural boundaries with
3053 exception of XFmode that is aligned to 64bits. */
3054 if (mode != VOIDmode && mode != BLKmode)
3056 int mode_alignment = GET_MODE_BITSIZE (mode);
3059 mode_alignment = 128;
3060 else if (mode == XCmode)
3061 mode_alignment = 256;
3062 if (COMPLEX_MODE_P (mode))
3063 mode_alignment /= 2;
3064 /* Misaligned fields are always returned in memory. */
3065 if (bit_offset % mode_alignment)
3069 /* for V1xx modes, just use the base mode */
3070 if (VECTOR_MODE_P (mode)
3071 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3072 mode = GET_MODE_INNER (mode);
3074 /* Classification of atomic types. */
3079 classes[0] = X86_64_SSE_CLASS;
3082 classes[0] = X86_64_SSE_CLASS;
3083 classes[1] = X86_64_SSEUP_CLASS;
3092 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3093 classes[0] = X86_64_INTEGERSI_CLASS;
3095 classes[0] = X86_64_INTEGER_CLASS;
3099 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3104 if (!(bit_offset % 64))
3105 classes[0] = X86_64_SSESF_CLASS;
3107 classes[0] = X86_64_SSE_CLASS;
3110 classes[0] = X86_64_SSEDF_CLASS;
3113 classes[0] = X86_64_X87_CLASS;
3114 classes[1] = X86_64_X87UP_CLASS;
3117 classes[0] = X86_64_SSE_CLASS;
3118 classes[1] = X86_64_SSEUP_CLASS;
3121 classes[0] = X86_64_SSE_CLASS;
3124 classes[0] = X86_64_SSEDF_CLASS;
3125 classes[1] = X86_64_SSEDF_CLASS;
3128 classes[0] = X86_64_COMPLEX_X87_CLASS;
3131 /* This modes is larger than 16 bytes. */
3139 classes[0] = X86_64_SSE_CLASS;
3140 classes[1] = X86_64_SSEUP_CLASS;
3146 classes[0] = X86_64_SSE_CLASS;
3152 gcc_assert (VECTOR_MODE_P (mode));
3157 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3159 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3160 classes[0] = X86_64_INTEGERSI_CLASS;
3162 classes[0] = X86_64_INTEGER_CLASS;
3163 classes[1] = X86_64_INTEGER_CLASS;
3164 return 1 + (bytes > 8);
3168 /* Examine the argument and return set number of register required in each
3169 class. Return 0 iff parameter should be passed in memory. */
3171 examine_argument (enum machine_mode mode, tree type, int in_return,
3172 int *int_nregs, int *sse_nregs)
3174 enum x86_64_reg_class class[MAX_CLASSES];
3175 int n = classify_argument (mode, type, class, 0);
3181 for (n--; n >= 0; n--)
3184 case X86_64_INTEGER_CLASS:
3185 case X86_64_INTEGERSI_CLASS:
3188 case X86_64_SSE_CLASS:
3189 case X86_64_SSESF_CLASS:
3190 case X86_64_SSEDF_CLASS:
3193 case X86_64_NO_CLASS:
3194 case X86_64_SSEUP_CLASS:
3196 case X86_64_X87_CLASS:
3197 case X86_64_X87UP_CLASS:
3201 case X86_64_COMPLEX_X87_CLASS:
3202 return in_return ? 2 : 0;
3203 case X86_64_MEMORY_CLASS:
3209 /* Construct container for the argument used by GCC interface. See
3210 FUNCTION_ARG for the detailed description. */
3213 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3214 tree type, int in_return, int nintregs, int nsseregs,
3215 const int *intreg, int sse_regno)
3217 enum machine_mode tmpmode;
3219 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3220 enum x86_64_reg_class class[MAX_CLASSES];
3224 int needed_sseregs, needed_intregs;
3225 rtx exp[MAX_CLASSES];
3228 n = classify_argument (mode, type, class, 0);
3229 if (TARGET_DEBUG_ARG)
3232 fprintf (stderr, "Memory class\n");
3235 fprintf (stderr, "Classes:");
3236 for (i = 0; i < n; i++)
3238 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3240 fprintf (stderr, "\n");
3245 if (!examine_argument (mode, type, in_return, &needed_intregs,
3248 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3251 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3252 some less clueful developer tries to use floating-point anyway. */
3253 if (needed_sseregs && !TARGET_SSE)
3255 static bool issued_error;
3258 issued_error = true;
3260 error ("SSE register return with SSE disabled");
3262 error ("SSE register argument with SSE disabled");
3267 /* First construct simple cases. Avoid SCmode, since we want to use
3268 single register to pass this type. */
3269 if (n == 1 && mode != SCmode)
3272 case X86_64_INTEGER_CLASS:
3273 case X86_64_INTEGERSI_CLASS:
3274 return gen_rtx_REG (mode, intreg[0]);
3275 case X86_64_SSE_CLASS:
3276 case X86_64_SSESF_CLASS:
3277 case X86_64_SSEDF_CLASS:
3278 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3279 case X86_64_X87_CLASS:
3280 case X86_64_COMPLEX_X87_CLASS:
3281 return gen_rtx_REG (mode, FIRST_STACK_REG);
3282 case X86_64_NO_CLASS:
3283 /* Zero sized array, struct or class. */
3288 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3290 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3292 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3293 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3294 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3295 && class[1] == X86_64_INTEGER_CLASS
3296 && (mode == CDImode || mode == TImode || mode == TFmode)
3297 && intreg[0] + 1 == intreg[1])
3298 return gen_rtx_REG (mode, intreg[0]);
3300 /* Otherwise figure out the entries of the PARALLEL. */
3301 for (i = 0; i < n; i++)
3305 case X86_64_NO_CLASS:
3307 case X86_64_INTEGER_CLASS:
3308 case X86_64_INTEGERSI_CLASS:
3309 /* Merge TImodes on aligned occasions here too. */
3310 if (i * 8 + 8 > bytes)
3311 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3312 else if (class[i] == X86_64_INTEGERSI_CLASS)
3316 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3317 if (tmpmode == BLKmode)
3319 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3320 gen_rtx_REG (tmpmode, *intreg),
3324 case X86_64_SSESF_CLASS:
3325 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3326 gen_rtx_REG (SFmode,
3327 SSE_REGNO (sse_regno)),
3331 case X86_64_SSEDF_CLASS:
3332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3333 gen_rtx_REG (DFmode,
3334 SSE_REGNO (sse_regno)),
3338 case X86_64_SSE_CLASS:
3339 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3344 gen_rtx_REG (tmpmode,
3345 SSE_REGNO (sse_regno)),
3347 if (tmpmode == TImode)
3356 /* Empty aligned struct, union or class. */
3360 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3361 for (i = 0; i < nexps; i++)
3362 XVECEXP (ret, 0, i) = exp [i];
3366 /* Update the data in CUM to advance over an argument
3367 of mode MODE and data type TYPE.
3368 (TYPE is null for libcalls where that information may not be available.) */
3371 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3372 tree type, int named)
3375 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3376 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3379 mode = type_natural_mode (type);
3381 if (TARGET_DEBUG_ARG)
3382 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3383 "mode=%s, named=%d)\n\n",
3384 words, cum->words, cum->nregs, cum->sse_nregs,
3385 GET_MODE_NAME (mode), named);
3389 int int_nregs, sse_nregs;
3390 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3391 cum->words += words;
3392 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3394 cum->nregs -= int_nregs;
3395 cum->sse_nregs -= sse_nregs;
3396 cum->regno += int_nregs;
3397 cum->sse_regno += sse_nregs;
3400 cum->words += words;
3418 cum->words += words;
3419 cum->nregs -= words;
3420 cum->regno += words;
3422 if (cum->nregs <= 0)
3430 if (cum->float_in_sse < 2)
3433 if (cum->float_in_sse < 1)
3444 if (!type || !AGGREGATE_TYPE_P (type))
3446 cum->sse_words += words;
3447 cum->sse_nregs -= 1;
3448 cum->sse_regno += 1;
3449 if (cum->sse_nregs <= 0)
3461 if (!type || !AGGREGATE_TYPE_P (type))
3463 cum->mmx_words += words;
3464 cum->mmx_nregs -= 1;
3465 cum->mmx_regno += 1;
3466 if (cum->mmx_nregs <= 0)
3477 /* Define where to put the arguments to a function.
3478 Value is zero to push the argument on the stack,
3479 or a hard register in which to store the argument.
3481 MODE is the argument's machine mode.
3482 TYPE is the data type of the argument (as a tree).
3483 This is null for libcalls where that information may
3485 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3486 the preceding args and about the function being called.
3487 NAMED is nonzero if this argument is a named parameter
3488 (otherwise it is an extra parameter matching an ellipsis). */
3491 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3492 tree type, int named)
3494 enum machine_mode mode = orig_mode;
3497 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3498 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3499 static bool warnedsse, warnedmmx;
3501 /* To simplify the code below, represent vector types with a vector mode
3502 even if MMX/SSE are not active. */
3503 if (type && TREE_CODE (type) == VECTOR_TYPE)
3504 mode = type_natural_mode (type);
3506 /* Handle a hidden AL argument containing number of registers for varargs
3507 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3509 if (mode == VOIDmode)
3512 return GEN_INT (cum->maybe_vaarg
3513 ? (cum->sse_nregs < 0
3521 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3523 &x86_64_int_parameter_registers [cum->regno],
3528 /* For now, pass fp/complex values on the stack. */
3540 if (words <= cum->nregs)
3542 int regno = cum->regno;
3544 /* Fastcall allocates the first two DWORD (SImode) or
3545 smaller arguments to ECX and EDX. */
3548 if (mode == BLKmode || mode == DImode)
3551 /* ECX not EAX is the first allocated register. */
3555 ret = gen_rtx_REG (mode, regno);
3559 if (cum->float_in_sse < 2)
3562 if (cum->float_in_sse < 1)
3572 if (!type || !AGGREGATE_TYPE_P (type))
3574 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3577 warning (0, "SSE vector argument without SSE enabled "
3581 ret = gen_reg_or_parallel (mode, orig_mode,
3582 cum->sse_regno + FIRST_SSE_REG);
3589 if (!type || !AGGREGATE_TYPE_P (type))
3591 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3594 warning (0, "MMX vector argument without MMX enabled "
3598 ret = gen_reg_or_parallel (mode, orig_mode,
3599 cum->mmx_regno + FIRST_MMX_REG);
3604 if (TARGET_DEBUG_ARG)
3607 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3608 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3611 print_simple_rtl (stderr, ret);
3613 fprintf (stderr, ", stack");
3615 fprintf (stderr, " )\n");
3621 /* A C expression that indicates when an argument must be passed by
3622 reference. If nonzero for an argument, a copy of that argument is
3623 made in memory and a pointer to the argument is passed instead of
3624 the argument itself. The pointer is passed in whatever way is
3625 appropriate for passing a pointer to that type. */
3628 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3629 enum machine_mode mode ATTRIBUTE_UNUSED,
3630 tree type, bool named ATTRIBUTE_UNUSED)
3635 if (type && int_size_in_bytes (type) == -1)
3637 if (TARGET_DEBUG_ARG)
3638 fprintf (stderr, "function_arg_pass_by_reference\n");
3645 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3646 ABI. Only called if TARGET_SSE. */
3648 contains_128bit_aligned_vector_p (tree type)
3650 enum machine_mode mode = TYPE_MODE (type);
3651 if (SSE_REG_MODE_P (mode)
3652 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3654 if (TYPE_ALIGN (type) < 128)
3657 if (AGGREGATE_TYPE_P (type))
3659 /* Walk the aggregates recursively. */
3660 switch (TREE_CODE (type))
3664 case QUAL_UNION_TYPE:
3668 if (TYPE_BINFO (type))
3670 tree binfo, base_binfo;
3673 for (binfo = TYPE_BINFO (type), i = 0;
3674 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3675 if (contains_128bit_aligned_vector_p
3676 (BINFO_TYPE (base_binfo)))
3679 /* And now merge the fields of structure. */
3680 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3682 if (TREE_CODE (field) == FIELD_DECL
3683 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3690 /* Just for use if some languages passes arrays by value. */
3691 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3702 /* Gives the alignment boundary, in bits, of an argument with the
3703 specified mode and type. */
3706 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3710 align = TYPE_ALIGN (type);
3712 align = GET_MODE_ALIGNMENT (mode);
3713 if (align < PARM_BOUNDARY)
3714 align = PARM_BOUNDARY;
3717 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3718 make an exception for SSE modes since these require 128bit
3721 The handling here differs from field_alignment. ICC aligns MMX
3722 arguments to 4 byte boundaries, while structure fields are aligned
3723 to 8 byte boundaries. */
3725 align = PARM_BOUNDARY;
3728 if (!SSE_REG_MODE_P (mode))
3729 align = PARM_BOUNDARY;
3733 if (!contains_128bit_aligned_vector_p (type))
3734 align = PARM_BOUNDARY;
3742 /* Return true if N is a possible register number of function value. */
3744 ix86_function_value_regno_p (int regno)
3747 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3748 || (regno == FIRST_SSE_REG && TARGET_SSE))
3752 && (regno == FIRST_MMX_REG && TARGET_MMX))
3758 /* Define how to find the value returned by a function.
3759 VALTYPE is the data type of the value (as a tree).
3760 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3761 otherwise, FUNC is 0. */
3763 ix86_function_value (tree valtype, tree fntype_or_decl,
3764 bool outgoing ATTRIBUTE_UNUSED)
3766 enum machine_mode natmode = type_natural_mode (valtype);
3770 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3771 1, REGPARM_MAX, SSE_REGPARM_MAX,
3772 x86_64_int_return_registers, 0);
3773 /* For zero sized structures, construct_container return NULL, but we
3774 need to keep rest of compiler happy by returning meaningful value. */
3776 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3781 tree fn = NULL_TREE, fntype;
3783 && DECL_P (fntype_or_decl))
3784 fn = fntype_or_decl;
3785 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3786 return gen_rtx_REG (TYPE_MODE (valtype),
3787 ix86_value_regno (natmode, fn, fntype));
3791 /* Return true iff type is returned in memory. */
3793 ix86_return_in_memory (tree type)
3795 int needed_intregs, needed_sseregs, size;
3796 enum machine_mode mode = type_natural_mode (type);
3799 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3801 if (mode == BLKmode)
3804 size = int_size_in_bytes (type);
3806 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3809 if (VECTOR_MODE_P (mode) || mode == TImode)
3811 /* User-created vectors small enough to fit in EAX. */
3815 /* MMX/3dNow values are returned in MM0,
3816 except when it doesn't exits. */
3818 return (TARGET_MMX ? 0 : 1);
3820 /* SSE values are returned in XMM0, except when it doesn't exist. */
3822 return (TARGET_SSE ? 0 : 1);
3836 /* When returning SSE vector types, we have a choice of either
3837 (1) being abi incompatible with a -march switch, or
3838 (2) generating an error.
3839 Given no good solution, I think the safest thing is one warning.
3840 The user won't be able to use -Werror, but....
3842 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3843 called in response to actually generating a caller or callee that
3844 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3845 via aggregate_value_p for general type probing from tree-ssa. */
3848 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3850 static bool warnedsse, warnedmmx;
3854 /* Look at the return type of the function, not the function type. */
3855 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3857 if (!TARGET_SSE && !warnedsse)
3860 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3863 warning (0, "SSE vector return without SSE enabled "
3868 if (!TARGET_MMX && !warnedmmx)
3870 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3873 warning (0, "MMX vector return without MMX enabled "
3882 /* Define how to find the value returned by a library function
3883 assuming the value has mode MODE. */
3885 ix86_libcall_value (enum machine_mode mode)
3899 return gen_rtx_REG (mode, FIRST_SSE_REG);
3902 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3906 return gen_rtx_REG (mode, 0);
3910 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3913 /* Given a mode, return the register to use for a return value. */
3916 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3918 gcc_assert (!TARGET_64BIT);
3920 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3921 we prevent this case when mmx is not available. */
3922 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3923 return FIRST_MMX_REG;
3925 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3926 we prevent this case when sse is not available. */
3927 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3928 return FIRST_SSE_REG;
3930 /* Decimal floating point values can go in %eax, unlike other float modes. */
3931 if (DECIMAL_FLOAT_MODE_P (mode))
3934 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3935 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3938 /* Floating point return values in %st(0), except for local functions when
3939 SSE math is enabled or for functions with sseregparm attribute. */
3940 if ((func || fntype)
3941 && (mode == SFmode || mode == DFmode))
3943 int sse_level = ix86_function_sseregparm (fntype, func);
3944 if ((sse_level >= 1 && mode == SFmode)
3945 || (sse_level == 2 && mode == DFmode))
3946 return FIRST_SSE_REG;
3949 return FIRST_FLOAT_REG;
3952 /* Create the va_list data type. */
3955 ix86_build_builtin_va_list (void)
3957 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3959 /* For i386 we use plain pointer to argument area. */
3961 return build_pointer_type (char_type_node);
3963 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3964 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3966 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3967 unsigned_type_node);
3968 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3969 unsigned_type_node);
3970 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3972 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3975 va_list_gpr_counter_field = f_gpr;
3976 va_list_fpr_counter_field = f_fpr;
3978 DECL_FIELD_CONTEXT (f_gpr) = record;
3979 DECL_FIELD_CONTEXT (f_fpr) = record;
3980 DECL_FIELD_CONTEXT (f_ovf) = record;
3981 DECL_FIELD_CONTEXT (f_sav) = record;
3983 TREE_CHAIN (record) = type_decl;
3984 TYPE_NAME (record) = type_decl;
3985 TYPE_FIELDS (record) = f_gpr;
3986 TREE_CHAIN (f_gpr) = f_fpr;
3987 TREE_CHAIN (f_fpr) = f_ovf;
3988 TREE_CHAIN (f_ovf) = f_sav;
3990 layout_type (record);
3992 /* The correct type is an array type of one element. */
3993 return build_array_type (record, build_index_type (size_zero_node));
3996 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3999 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4000 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4003 CUMULATIVE_ARGS next_cum;
4004 rtx save_area = NULL_RTX, mem;
4017 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4020 /* Indicate to allocate space on the stack for varargs save area. */
4021 ix86_save_varrargs_registers = 1;
4023 cfun->stack_alignment_needed = 128;
4025 fntype = TREE_TYPE (current_function_decl);
4026 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4027 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4028 != void_type_node));
4030 /* For varargs, we do not want to skip the dummy va_dcl argument.
4031 For stdargs, we do want to skip the last named argument. */
4034 function_arg_advance (&next_cum, mode, type, 1);
4037 save_area = frame_pointer_rtx;
4039 set = get_varargs_alias_set ();
4041 for (i = next_cum.regno;
4043 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4046 mem = gen_rtx_MEM (Pmode,
4047 plus_constant (save_area, i * UNITS_PER_WORD));
4048 MEM_NOTRAP_P (mem) = 1;
4049 set_mem_alias_set (mem, set);
4050 emit_move_insn (mem, gen_rtx_REG (Pmode,
4051 x86_64_int_parameter_registers[i]));
4054 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4056 /* Now emit code to save SSE registers. The AX parameter contains number
4057 of SSE parameter registers used to call this function. We use
4058 sse_prologue_save insn template that produces computed jump across
4059 SSE saves. We need some preparation work to get this working. */
4061 label = gen_label_rtx ();
4062 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4064 /* Compute address to jump to :
4065 label - 5*eax + nnamed_sse_arguments*5 */
4066 tmp_reg = gen_reg_rtx (Pmode);
4067 nsse_reg = gen_reg_rtx (Pmode);
4068 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4069 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4070 gen_rtx_MULT (Pmode, nsse_reg,
4072 if (next_cum.sse_regno)
4075 gen_rtx_CONST (DImode,
4076 gen_rtx_PLUS (DImode,
4078 GEN_INT (next_cum.sse_regno * 4))));
4080 emit_move_insn (nsse_reg, label_ref);
4081 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4083 /* Compute address of memory block we save into. We always use pointer
4084 pointing 127 bytes after first byte to store - this is needed to keep
4085 instruction size limited by 4 bytes. */
4086 tmp_reg = gen_reg_rtx (Pmode);
4087 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4088 plus_constant (save_area,
4089 8 * REGPARM_MAX + 127)));
4090 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4091 MEM_NOTRAP_P (mem) = 1;
4092 set_mem_alias_set (mem, set);
4093 set_mem_align (mem, BITS_PER_WORD);
4095 /* And finally do the dirty job! */
4096 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4097 GEN_INT (next_cum.sse_regno), label));
4102 /* Implement va_start. */
4105 ix86_va_start (tree valist, rtx nextarg)
4107 HOST_WIDE_INT words, n_gpr, n_fpr;
4108 tree f_gpr, f_fpr, f_ovf, f_sav;
4109 tree gpr, fpr, ovf, sav, t;
4111 /* Only 64bit target needs something special. */
4114 std_expand_builtin_va_start (valist, nextarg);
4118 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4119 f_fpr = TREE_CHAIN (f_gpr);
4120 f_ovf = TREE_CHAIN (f_fpr);
4121 f_sav = TREE_CHAIN (f_ovf);
4123 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4124 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4125 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4126 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4127 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4129 /* Count number of gp and fp argument registers used. */
4130 words = current_function_args_info.words;
4131 n_gpr = current_function_args_info.regno;
4132 n_fpr = current_function_args_info.sse_regno;
4134 if (TARGET_DEBUG_ARG)
4135 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4136 (int) words, (int) n_gpr, (int) n_fpr);
4138 if (cfun->va_list_gpr_size)
4140 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
4141 build_int_cst (NULL_TREE, n_gpr * 8));
4142 TREE_SIDE_EFFECTS (t) = 1;
4143 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4146 if (cfun->va_list_fpr_size)
4148 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
4149 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
4150 TREE_SIDE_EFFECTS (t) = 1;
4151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4154 /* Find the overflow area. */
4155 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
4157 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
4158 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
4159 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4160 TREE_SIDE_EFFECTS (t) = 1;
4161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4163 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4165 /* Find the register save area.
4166 Prologue of the function save it right above stack frame. */
4167 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
4168 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
4169 TREE_SIDE_EFFECTS (t) = 1;
4170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4174 /* Implement va_arg. */
4177 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4179 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4180 tree f_gpr, f_fpr, f_ovf, f_sav;
4181 tree gpr, fpr, ovf, sav, t;
4183 tree lab_false, lab_over = NULL_TREE;
4188 enum machine_mode nat_mode;
4190 /* Only 64bit target needs something special. */
4192 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4194 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4195 f_fpr = TREE_CHAIN (f_gpr);
4196 f_ovf = TREE_CHAIN (f_fpr);
4197 f_sav = TREE_CHAIN (f_ovf);
4199 valist = build_va_arg_indirect_ref (valist);
4200 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4201 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4202 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4203 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4205 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4207 type = build_pointer_type (type);
4208 size = int_size_in_bytes (type);
4209 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4211 nat_mode = type_natural_mode (type);
4212 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4213 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4215 /* Pull the value out of the saved registers. */
4217 addr = create_tmp_var (ptr_type_node, "addr");
4218 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4222 int needed_intregs, needed_sseregs;
4224 tree int_addr, sse_addr;
4226 lab_false = create_artificial_label ();
4227 lab_over = create_artificial_label ();
4229 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4231 need_temp = (!REG_P (container)
4232 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4233 || TYPE_ALIGN (type) > 128));
4235 /* In case we are passing structure, verify that it is consecutive block
4236 on the register save area. If not we need to do moves. */
4237 if (!need_temp && !REG_P (container))
4239 /* Verify that all registers are strictly consecutive */
4240 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4244 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4246 rtx slot = XVECEXP (container, 0, i);
4247 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4248 || INTVAL (XEXP (slot, 1)) != i * 16)
4256 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4258 rtx slot = XVECEXP (container, 0, i);
4259 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4260 || INTVAL (XEXP (slot, 1)) != i * 8)
4272 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4273 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4274 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4275 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4278 /* First ensure that we fit completely in registers. */
4281 t = build_int_cst (TREE_TYPE (gpr),
4282 (REGPARM_MAX - needed_intregs + 1) * 8);
4283 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4284 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4285 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4286 gimplify_and_add (t, pre_p);
4290 t = build_int_cst (TREE_TYPE (fpr),
4291 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4293 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4294 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4295 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4296 gimplify_and_add (t, pre_p);
4299 /* Compute index to start of area used for integer regs. */
4302 /* int_addr = gpr + sav; */
4303 t = fold_convert (ptr_type_node, gpr);
4304 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4305 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4306 gimplify_and_add (t, pre_p);
4310 /* sse_addr = fpr + sav; */
4311 t = fold_convert (ptr_type_node, fpr);
4312 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4313 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4314 gimplify_and_add (t, pre_p);
4319 tree temp = create_tmp_var (type, "va_arg_tmp");
4322 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4323 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4324 gimplify_and_add (t, pre_p);
4326 for (i = 0; i < XVECLEN (container, 0); i++)
4328 rtx slot = XVECEXP (container, 0, i);
4329 rtx reg = XEXP (slot, 0);
4330 enum machine_mode mode = GET_MODE (reg);
4331 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4332 tree addr_type = build_pointer_type (piece_type);
4335 tree dest_addr, dest;
4337 if (SSE_REGNO_P (REGNO (reg)))
4339 src_addr = sse_addr;
4340 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4344 src_addr = int_addr;
4345 src_offset = REGNO (reg) * 8;
4347 src_addr = fold_convert (addr_type, src_addr);
4348 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4349 size_int (src_offset)));
4350 src = build_va_arg_indirect_ref (src_addr);
4352 dest_addr = fold_convert (addr_type, addr);
4353 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4354 size_int (INTVAL (XEXP (slot, 1)))));
4355 dest = build_va_arg_indirect_ref (dest_addr);
4357 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4358 gimplify_and_add (t, pre_p);
4364 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4365 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4366 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4367 gimplify_and_add (t, pre_p);
4371 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4372 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4373 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4374 gimplify_and_add (t, pre_p);
4377 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4378 gimplify_and_add (t, pre_p);
4380 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4381 append_to_statement_list (t, pre_p);
4384 /* ... otherwise out of the overflow area. */
4386 /* Care for on-stack alignment if needed. */
4387 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4388 || integer_zerop (TYPE_SIZE (type)))
4392 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4393 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4394 build_int_cst (TREE_TYPE (ovf), align - 1));
4395 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4396 build_int_cst (TREE_TYPE (t), -align));
4398 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4400 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4401 gimplify_and_add (t2, pre_p);
4403 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4404 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4405 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4406 gimplify_and_add (t, pre_p);
4410 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4411 append_to_statement_list (t, pre_p);
4414 ptrtype = build_pointer_type (type);
4415 addr = fold_convert (ptrtype, addr);
4418 addr = build_va_arg_indirect_ref (addr);
4419 return build_va_arg_indirect_ref (addr);
4422 /* Return nonzero if OPNUM's MEM should be matched
4423 in movabs* patterns. */
4426 ix86_check_movabs (rtx insn, int opnum)
4430 set = PATTERN (insn);
4431 if (GET_CODE (set) == PARALLEL)
4432 set = XVECEXP (set, 0, 0);
4433 gcc_assert (GET_CODE (set) == SET);
4434 mem = XEXP (set, opnum);
4435 while (GET_CODE (mem) == SUBREG)
4436 mem = SUBREG_REG (mem);
4437 gcc_assert (GET_CODE (mem) == MEM);
4438 return (volatile_ok || !MEM_VOLATILE_P (mem));
4441 /* Initialize the table of extra 80387 mathematical constants. */
4444 init_ext_80387_constants (void)
4446 static const char * cst[5] =
4448 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4449 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4450 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4451 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4452 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4456 for (i = 0; i < 5; i++)
4458 real_from_string (&ext_80387_constants_table[i], cst[i]);
4459 /* Ensure each constant is rounded to XFmode precision. */
4460 real_convert (&ext_80387_constants_table[i],
4461 XFmode, &ext_80387_constants_table[i]);
4464 ext_80387_constants_init = 1;
4467 /* Return true if the constant is something that can be loaded with
4468 a special instruction. */
4471 standard_80387_constant_p (rtx x)
4473 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4476 if (x == CONST0_RTX (GET_MODE (x)))
4478 if (x == CONST1_RTX (GET_MODE (x)))
4481 /* For XFmode constants, try to find a special 80387 instruction when
4482 optimizing for size or on those CPUs that benefit from them. */
4483 if (GET_MODE (x) == XFmode
4484 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4489 if (! ext_80387_constants_init)
4490 init_ext_80387_constants ();
4492 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4493 for (i = 0; i < 5; i++)
4494 if (real_identical (&r, &ext_80387_constants_table[i]))
4501 /* Return the opcode of the special instruction to be used to load
4505 standard_80387_constant_opcode (rtx x)
4507 switch (standard_80387_constant_p (x))
4528 /* Return the CONST_DOUBLE representing the 80387 constant that is
4529 loaded by the specified special instruction. The argument IDX
4530 matches the return value from standard_80387_constant_p. */
4533 standard_80387_constant_rtx (int idx)
4537 if (! ext_80387_constants_init)
4538 init_ext_80387_constants ();
4554 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4558 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4561 standard_sse_constant_p (rtx x)
4563 if (x == const0_rtx)
4565 return (x == CONST0_RTX (GET_MODE (x)));
4568 /* Returns 1 if OP contains a symbol reference */
4571 symbolic_reference_mentioned_p (rtx op)
4576 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4579 fmt = GET_RTX_FORMAT (GET_CODE (op));
4580 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4586 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4587 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4591 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4598 /* Return 1 if it is appropriate to emit `ret' instructions in the
4599 body of a function. Do this only if the epilogue is simple, needing a
4600 couple of insns. Prior to reloading, we can't tell how many registers
4601 must be saved, so return 0 then. Return 0 if there is no frame
4602 marker to de-allocate. */
4605 ix86_can_use_return_insn_p (void)
4607 struct ix86_frame frame;
4609 if (! reload_completed || frame_pointer_needed)
4612 /* Don't allow more than 32 pop, since that's all we can do
4613 with one instruction. */
4614 if (current_function_pops_args
4615 && current_function_args_size >= 32768)
4618 ix86_compute_frame_layout (&frame);
4619 return frame.to_allocate == 0 && frame.nregs == 0;
4622 /* Value should be nonzero if functions must have frame pointers.
4623 Zero means the frame pointer need not be set up (and parms may
4624 be accessed via the stack pointer) in functions that seem suitable. */
4627 ix86_frame_pointer_required (void)
4629 /* If we accessed previous frames, then the generated code expects
4630 to be able to access the saved ebp value in our frame. */
4631 if (cfun->machine->accesses_prev_frame)
4634 /* Several x86 os'es need a frame pointer for other reasons,
4635 usually pertaining to setjmp. */
4636 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4639 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4640 the frame pointer by default. Turn it back on now if we've not
4641 got a leaf function. */
4642 if (TARGET_OMIT_LEAF_FRAME_POINTER
4643 && (!current_function_is_leaf
4644 || ix86_current_function_calls_tls_descriptor))
4647 if (current_function_profile)
4653 /* Record that the current function accesses previous call frames. */
4656 ix86_setup_frame_addresses (void)
4658 cfun->machine->accesses_prev_frame = 1;
4661 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4662 # define USE_HIDDEN_LINKONCE 1
4664 # define USE_HIDDEN_LINKONCE 0
4667 static int pic_labels_used;
4669 /* Fills in the label name that should be used for a pc thunk for
4670 the given register. */
4673 get_pc_thunk_name (char name[32], unsigned int regno)
4675 if (USE_HIDDEN_LINKONCE)
4676 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4678 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4682 /* This function generates code for -fpic that loads %ebx with
4683 the return address of the caller and then returns. */
4686 ix86_file_end (void)
4691 for (regno = 0; regno < 8; ++regno)
4695 if (! ((pic_labels_used >> regno) & 1))
4698 get_pc_thunk_name (name, regno);
4703 switch_to_section (darwin_sections[text_coal_section]);
4704 fputs ("\t.weak_definition\t", asm_out_file);
4705 assemble_name (asm_out_file, name);
4706 fputs ("\n\t.private_extern\t", asm_out_file);
4707 assemble_name (asm_out_file, name);
4708 fputs ("\n", asm_out_file);
4709 ASM_OUTPUT_LABEL (asm_out_file, name);
4713 if (USE_HIDDEN_LINKONCE)
4717 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4719 TREE_PUBLIC (decl) = 1;
4720 TREE_STATIC (decl) = 1;
4721 DECL_ONE_ONLY (decl) = 1;
4723 (*targetm.asm_out.unique_section) (decl, 0);
4724 switch_to_section (get_named_section (decl, NULL, 0));
4726 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4727 fputs ("\t.hidden\t", asm_out_file);
4728 assemble_name (asm_out_file, name);
4729 fputc ('\n', asm_out_file);
4730 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4734 switch_to_section (text_section);
4735 ASM_OUTPUT_LABEL (asm_out_file, name);
4738 xops[0] = gen_rtx_REG (SImode, regno);
4739 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4740 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4741 output_asm_insn ("ret", xops);
4744 if (NEED_INDICATE_EXEC_STACK)
4745 file_end_indicate_exec_stack ();
4748 /* Emit code for the SET_GOT patterns. */
4751 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4756 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4758 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4760 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4763 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4765 output_asm_insn ("call\t%a2", xops);
4768 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4769 is what will be referenced by the Mach-O PIC subsystem. */
4771 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4774 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4775 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4778 output_asm_insn ("pop{l}\t%0", xops);
4783 get_pc_thunk_name (name, REGNO (dest));
4784 pic_labels_used |= 1 << REGNO (dest);
4786 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4787 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4788 output_asm_insn ("call\t%X2", xops);
4789 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4790 is what will be referenced by the Mach-O PIC subsystem. */
4793 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4795 targetm.asm_out.internal_label (asm_out_file, "L",
4796 CODE_LABEL_NUMBER (label));
4803 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4804 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4806 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4811 /* Generate an "push" pattern for input ARG. */
4816 return gen_rtx_SET (VOIDmode,
4818 gen_rtx_PRE_DEC (Pmode,
4819 stack_pointer_rtx)),
4823 /* Return >= 0 if there is an unused call-clobbered register available
4824 for the entire function. */
4827 ix86_select_alt_pic_regnum (void)
4829 if (current_function_is_leaf && !current_function_profile
4830 && !ix86_current_function_calls_tls_descriptor)
4833 for (i = 2; i >= 0; --i)
4834 if (!regs_ever_live[i])
4838 return INVALID_REGNUM;
4841 /* Return 1 if we need to save REGNO. */
4843 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4845 if (pic_offset_table_rtx
4846 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4847 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4848 || current_function_profile
4849 || current_function_calls_eh_return
4850 || current_function_uses_const_pool))
4852 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4857 if (current_function_calls_eh_return && maybe_eh_return)
4862 unsigned test = EH_RETURN_DATA_REGNO (i);
4863 if (test == INVALID_REGNUM)
4870 if (cfun->machine->force_align_arg_pointer
4871 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4874 return (regs_ever_live[regno]
4875 && !call_used_regs[regno]
4876 && !fixed_regs[regno]
4877 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4880 /* Return number of registers to be saved on the stack. */
4883 ix86_nsaved_regs (void)
4888 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4889 if (ix86_save_reg (regno, true))
4894 /* Return the offset between two registers, one to be eliminated, and the other
4895 its replacement, at the start of a routine. */
4898 ix86_initial_elimination_offset (int from, int to)
4900 struct ix86_frame frame;
4901 ix86_compute_frame_layout (&frame);
4903 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4904 return frame.hard_frame_pointer_offset;
4905 else if (from == FRAME_POINTER_REGNUM
4906 && to == HARD_FRAME_POINTER_REGNUM)
4907 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4910 gcc_assert (to == STACK_POINTER_REGNUM);
4912 if (from == ARG_POINTER_REGNUM)
4913 return frame.stack_pointer_offset;
4915 gcc_assert (from == FRAME_POINTER_REGNUM);
4916 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4920 /* Fill structure ix86_frame about frame of currently computed function. */
4923 ix86_compute_frame_layout (struct ix86_frame *frame)
4925 HOST_WIDE_INT total_size;
4926 unsigned int stack_alignment_needed;
4927 HOST_WIDE_INT offset;
4928 unsigned int preferred_alignment;
4929 HOST_WIDE_INT size = get_frame_size ();
4931 frame->nregs = ix86_nsaved_regs ();
4934 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4935 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4937 /* During reload iteration the amount of registers saved can change.
4938 Recompute the value as needed. Do not recompute when amount of registers
4939 didn't change as reload does multiple calls to the function and does not
4940 expect the decision to change within single iteration. */
4942 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4944 int count = frame->nregs;
4946 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4947 /* The fast prologue uses move instead of push to save registers. This
4948 is significantly longer, but also executes faster as modern hardware
4949 can execute the moves in parallel, but can't do that for push/pop.
4951 Be careful about choosing what prologue to emit: When function takes
4952 many instructions to execute we may use slow version as well as in
4953 case function is known to be outside hot spot (this is known with
4954 feedback only). Weight the size of function by number of registers
4955 to save as it is cheap to use one or two push instructions but very
4956 slow to use many of them. */
4958 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4959 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4960 || (flag_branch_probabilities
4961 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4962 cfun->machine->use_fast_prologue_epilogue = false;
4964 cfun->machine->use_fast_prologue_epilogue
4965 = !expensive_function_p (count);
4967 if (TARGET_PROLOGUE_USING_MOVE
4968 && cfun->machine->use_fast_prologue_epilogue)
4969 frame->save_regs_using_mov = true;
4971 frame->save_regs_using_mov = false;
4974 /* Skip return address and saved base pointer. */
4975 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4977 frame->hard_frame_pointer_offset = offset;
4979 /* Do some sanity checking of stack_alignment_needed and
4980 preferred_alignment, since i386 port is the only using those features
4981 that may break easily. */
4983 gcc_assert (!size || stack_alignment_needed);
4984 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4985 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4986 gcc_assert (stack_alignment_needed
4987 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4989 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4990 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4992 /* Register save area */
4993 offset += frame->nregs * UNITS_PER_WORD;
4996 if (ix86_save_varrargs_registers)
4998 offset += X86_64_VARARGS_SIZE;
4999 frame->va_arg_size = X86_64_VARARGS_SIZE;
5002 frame->va_arg_size = 0;
5004 /* Align start of frame for local function. */
5005 frame->padding1 = ((offset + stack_alignment_needed - 1)
5006 & -stack_alignment_needed) - offset;
5008 offset += frame->padding1;
5010 /* Frame pointer points here. */
5011 frame->frame_pointer_offset = offset;
5015 /* Add outgoing arguments area. Can be skipped if we eliminated
5016 all the function calls as dead code.
5017 Skipping is however impossible when function calls alloca. Alloca
5018 expander assumes that last current_function_outgoing_args_size
5019 of stack frame are unused. */
5020 if (ACCUMULATE_OUTGOING_ARGS
5021 && (!current_function_is_leaf || current_function_calls_alloca
5022 || ix86_current_function_calls_tls_descriptor))
5024 offset += current_function_outgoing_args_size;
5025 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5028 frame->outgoing_arguments_size = 0;
5030 /* Align stack boundary. Only needed if we're calling another function
5032 if (!current_function_is_leaf || current_function_calls_alloca
5033 || ix86_current_function_calls_tls_descriptor)
5034 frame->padding2 = ((offset + preferred_alignment - 1)
5035 & -preferred_alignment) - offset;
5037 frame->padding2 = 0;
5039 offset += frame->padding2;
5041 /* We've reached end of stack frame. */
5042 frame->stack_pointer_offset = offset;
5044 /* Size prologue needs to allocate. */
5045 frame->to_allocate =
5046 (size + frame->padding1 + frame->padding2
5047 + frame->outgoing_arguments_size + frame->va_arg_size);
5049 if ((!frame->to_allocate && frame->nregs <= 1)
5050 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5051 frame->save_regs_using_mov = false;
5053 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5054 && current_function_is_leaf
5055 && !ix86_current_function_calls_tls_descriptor)
5057 frame->red_zone_size = frame->to_allocate;
5058 if (frame->save_regs_using_mov)
5059 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5060 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5061 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5064 frame->red_zone_size = 0;
5065 frame->to_allocate -= frame->red_zone_size;
5066 frame->stack_pointer_offset -= frame->red_zone_size;
5068 fprintf (stderr, "nregs: %i\n", frame->nregs);
5069 fprintf (stderr, "size: %i\n", size);
5070 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5071 fprintf (stderr, "padding1: %i\n", frame->padding1);
5072 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5073 fprintf (stderr, "padding2: %i\n", frame->padding2);
5074 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5075 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5076 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5077 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5078 frame->hard_frame_pointer_offset);
5079 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5083 /* Emit code to save registers in the prologue. */
5086 ix86_emit_save_regs (void)
5091 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5092 if (ix86_save_reg (regno, true))
5094 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5095 RTX_FRAME_RELATED_P (insn) = 1;
5099 /* Emit code to save registers using MOV insns. First register
5100 is restored from POINTER + OFFSET. */
5102 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5107 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5108 if (ix86_save_reg (regno, true))
5110 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5112 gen_rtx_REG (Pmode, regno));
5113 RTX_FRAME_RELATED_P (insn) = 1;
5114 offset += UNITS_PER_WORD;
5118 /* Expand prologue or epilogue stack adjustment.
5119 The pattern exist to put a dependency on all ebp-based memory accesses.
5120 STYLE should be negative if instructions should be marked as frame related,
5121 zero if %r11 register is live and cannot be freely used and positive
5125 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5130 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5131 else if (x86_64_immediate_operand (offset, DImode))
5132 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5136 /* r11 is used by indirect sibcall return as well, set before the
5137 epilogue and used after the epilogue. ATM indirect sibcall
5138 shouldn't be used together with huge frame sizes in one
5139 function because of the frame_size check in sibcall.c. */
5141 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5142 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5144 RTX_FRAME_RELATED_P (insn) = 1;
5145 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5149 RTX_FRAME_RELATED_P (insn) = 1;
5152 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5155 ix86_internal_arg_pointer (void)
5157 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5158 && DECL_NAME (current_function_decl)
5159 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5160 && DECL_FILE_SCOPE_P (current_function_decl))
5162 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5163 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5166 return virtual_incoming_args_rtx;
5169 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5170 This is called from dwarf2out.c to emit call frame instructions
5171 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5173 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5175 rtx unspec = SET_SRC (pattern);
5176 gcc_assert (GET_CODE (unspec) == UNSPEC);
5180 case UNSPEC_REG_SAVE:
5181 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5182 SET_DEST (pattern));
5184 case UNSPEC_DEF_CFA:
5185 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5186 INTVAL (XVECEXP (unspec, 0, 0)));
5193 /* Expand the prologue into a bunch of separate insns. */
5196 ix86_expand_prologue (void)
5200 struct ix86_frame frame;
5201 HOST_WIDE_INT allocate;
5203 ix86_compute_frame_layout (&frame);
5205 if (cfun->machine->force_align_arg_pointer)
5209 /* Grab the argument pointer. */
5210 x = plus_constant (stack_pointer_rtx, 4);
5211 y = cfun->machine->force_align_arg_pointer;
5212 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5213 RTX_FRAME_RELATED_P (insn) = 1;
5215 /* The unwind info consists of two parts: install the fafp as the cfa,
5216 and record the fafp as the "save register" of the stack pointer.
5217 The later is there in order that the unwinder can see where it
5218 should restore the stack pointer across the and insn. */
5219 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5220 x = gen_rtx_SET (VOIDmode, y, x);
5221 RTX_FRAME_RELATED_P (x) = 1;
5222 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5224 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5225 RTX_FRAME_RELATED_P (y) = 1;
5226 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5227 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5228 REG_NOTES (insn) = x;
5230 /* Align the stack. */
5231 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5234 /* And here we cheat like madmen with the unwind info. We force the
5235 cfa register back to sp+4, which is exactly what it was at the
5236 start of the function. Re-pushing the return address results in
5237 the return at the same spot relative to the cfa, and thus is
5238 correct wrt the unwind info. */
5239 x = cfun->machine->force_align_arg_pointer;
5240 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5241 insn = emit_insn (gen_push (x));
5242 RTX_FRAME_RELATED_P (insn) = 1;
5245 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5246 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5247 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5248 REG_NOTES (insn) = x;
5251 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5252 slower on all targets. Also sdb doesn't like it. */
5254 if (frame_pointer_needed)
5256 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5257 RTX_FRAME_RELATED_P (insn) = 1;
5259 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5260 RTX_FRAME_RELATED_P (insn) = 1;
5263 allocate = frame.to_allocate;
5265 if (!frame.save_regs_using_mov)
5266 ix86_emit_save_regs ();
5268 allocate += frame.nregs * UNITS_PER_WORD;
5270 /* When using red zone we may start register saving before allocating
5271 the stack frame saving one cycle of the prologue. */
5272 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5273 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5274 : stack_pointer_rtx,
5275 -frame.nregs * UNITS_PER_WORD);
5279 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5280 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5281 GEN_INT (-allocate), -1);
5284 /* Only valid for Win32. */
5285 rtx eax = gen_rtx_REG (SImode, 0);
5286 bool eax_live = ix86_eax_live_at_start_p ();
5289 gcc_assert (!TARGET_64BIT);
5293 emit_insn (gen_push (eax));
5297 emit_move_insn (eax, GEN_INT (allocate));
5299 insn = emit_insn (gen_allocate_stack_worker (eax));
5300 RTX_FRAME_RELATED_P (insn) = 1;
5301 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5302 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5303 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5304 t, REG_NOTES (insn));
5308 if (frame_pointer_needed)
5309 t = plus_constant (hard_frame_pointer_rtx,
5312 - frame.nregs * UNITS_PER_WORD);
5314 t = plus_constant (stack_pointer_rtx, allocate);
5315 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5319 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5321 if (!frame_pointer_needed || !frame.to_allocate)
5322 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5324 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5325 -frame.nregs * UNITS_PER_WORD);
5328 pic_reg_used = false;
5329 if (pic_offset_table_rtx
5330 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5331 || current_function_profile))
5333 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5335 if (alt_pic_reg_used != INVALID_REGNUM)
5336 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5338 pic_reg_used = true;
5344 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5346 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5348 /* Even with accurate pre-reload life analysis, we can wind up
5349 deleting all references to the pic register after reload.
5350 Consider if cross-jumping unifies two sides of a branch
5351 controlled by a comparison vs the only read from a global.
5352 In which case, allow the set_got to be deleted, though we're
5353 too late to do anything about the ebx save in the prologue. */
5354 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5357 /* Prevent function calls from be scheduled before the call to mcount.
5358 In the pic_reg_used case, make sure that the got load isn't deleted. */
5359 if (current_function_profile)
5360 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5363 /* Emit code to restore saved registers using MOV insns. First register
5364 is restored from POINTER + OFFSET. */
5366 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5367 int maybe_eh_return)
5370 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5372 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5373 if (ix86_save_reg (regno, maybe_eh_return))
5375 /* Ensure that adjust_address won't be forced to produce pointer
5376 out of range allowed by x86-64 instruction set. */
5377 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5381 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5382 emit_move_insn (r11, GEN_INT (offset));
5383 emit_insn (gen_adddi3 (r11, r11, pointer));
5384 base_address = gen_rtx_MEM (Pmode, r11);
5387 emit_move_insn (gen_rtx_REG (Pmode, regno),
5388 adjust_address (base_address, Pmode, offset));
5389 offset += UNITS_PER_WORD;
5393 /* Restore function stack, frame, and registers. */
5396 ix86_expand_epilogue (int style)
5399 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5400 struct ix86_frame frame;
5401 HOST_WIDE_INT offset;
5403 ix86_compute_frame_layout (&frame);
5405 /* Calculate start of saved registers relative to ebp. Special care
5406 must be taken for the normal return case of a function using
5407 eh_return: the eax and edx registers are marked as saved, but not
5408 restored along this path. */
5409 offset = frame.nregs;
5410 if (current_function_calls_eh_return && style != 2)
5412 offset *= -UNITS_PER_WORD;
5414 /* If we're only restoring one register and sp is not valid then
5415 using a move instruction to restore the register since it's
5416 less work than reloading sp and popping the register.
5418 The default code result in stack adjustment using add/lea instruction,
5419 while this code results in LEAVE instruction (or discrete equivalent),
5420 so it is profitable in some other cases as well. Especially when there
5421 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5422 and there is exactly one register to pop. This heuristic may need some
5423 tuning in future. */
5424 if ((!sp_valid && frame.nregs <= 1)
5425 || (TARGET_EPILOGUE_USING_MOVE
5426 && cfun->machine->use_fast_prologue_epilogue
5427 && (frame.nregs > 1 || frame.to_allocate))
5428 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5429 || (frame_pointer_needed && TARGET_USE_LEAVE
5430 && cfun->machine->use_fast_prologue_epilogue
5431 && frame.nregs == 1)
5432 || current_function_calls_eh_return)
5434 /* Restore registers. We can use ebp or esp to address the memory
5435 locations. If both are available, default to ebp, since offsets
5436 are known to be small. Only exception is esp pointing directly to the
5437 end of block of saved registers, where we may simplify addressing
5440 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5441 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5442 frame.to_allocate, style == 2);
5444 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5445 offset, style == 2);
5447 /* eh_return epilogues need %ecx added to the stack pointer. */
5450 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5452 if (frame_pointer_needed)
5454 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5455 tmp = plus_constant (tmp, UNITS_PER_WORD);
5456 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5458 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5459 emit_move_insn (hard_frame_pointer_rtx, tmp);
5461 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5466 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5467 tmp = plus_constant (tmp, (frame.to_allocate
5468 + frame.nregs * UNITS_PER_WORD));
5469 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5472 else if (!frame_pointer_needed)
5473 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5474 GEN_INT (frame.to_allocate
5475 + frame.nregs * UNITS_PER_WORD),
5477 /* If not an i386, mov & pop is faster than "leave". */
5478 else if (TARGET_USE_LEAVE || optimize_size
5479 || !cfun->machine->use_fast_prologue_epilogue)
5480 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5483 pro_epilogue_adjust_stack (stack_pointer_rtx,
5484 hard_frame_pointer_rtx,
5487 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5489 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5494 /* First step is to deallocate the stack frame so that we can
5495 pop the registers. */
5498 gcc_assert (frame_pointer_needed);
5499 pro_epilogue_adjust_stack (stack_pointer_rtx,
5500 hard_frame_pointer_rtx,
5501 GEN_INT (offset), style);
5503 else if (frame.to_allocate)
5504 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5505 GEN_INT (frame.to_allocate), style);
5507 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5508 if (ix86_save_reg (regno, false))
5511 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5513 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5515 if (frame_pointer_needed)
5517 /* Leave results in shorter dependency chains on CPUs that are
5518 able to grok it fast. */
5519 if (TARGET_USE_LEAVE)
5520 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5521 else if (TARGET_64BIT)
5522 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5524 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5528 if (cfun->machine->force_align_arg_pointer)
5530 emit_insn (gen_addsi3 (stack_pointer_rtx,
5531 cfun->machine->force_align_arg_pointer,
5535 /* Sibcall epilogues don't want a return instruction. */
5539 if (current_function_pops_args && current_function_args_size)
5541 rtx popc = GEN_INT (current_function_pops_args);
5543 /* i386 can only pop 64K bytes. If asked to pop more, pop
5544 return address, do explicit add, and jump indirectly to the
5547 if (current_function_pops_args >= 65536)
5549 rtx ecx = gen_rtx_REG (SImode, 2);
5551 /* There is no "pascal" calling convention in 64bit ABI. */
5552 gcc_assert (!TARGET_64BIT);
5554 emit_insn (gen_popsi1 (ecx));
5555 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5556 emit_jump_insn (gen_return_indirect_internal (ecx));
5559 emit_jump_insn (gen_return_pop_internal (popc));
5562 emit_jump_insn (gen_return_internal ());
5565 /* Reset from the function's potential modifications. */
5568 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5569 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5571 if (pic_offset_table_rtx)
5572 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5575 /* Extract the parts of an RTL expression that is a valid memory address
5576 for an instruction. Return 0 if the structure of the address is
5577 grossly off. Return -1 if the address contains ASHIFT, so it is not
5578 strictly valid, but still used for computing length of lea instruction. */
5581 ix86_decompose_address (rtx addr, struct ix86_address *out)
5583 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5584 rtx base_reg, index_reg;
5585 HOST_WIDE_INT scale = 1;
5586 rtx scale_rtx = NULL_RTX;
5588 enum ix86_address_seg seg = SEG_DEFAULT;
5590 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5592 else if (GET_CODE (addr) == PLUS)
5602 addends[n++] = XEXP (op, 1);
5605 while (GET_CODE (op) == PLUS);
5610 for (i = n; i >= 0; --i)
5613 switch (GET_CODE (op))
5618 index = XEXP (op, 0);
5619 scale_rtx = XEXP (op, 1);
5623 if (XINT (op, 1) == UNSPEC_TP
5624 && TARGET_TLS_DIRECT_SEG_REFS
5625 && seg == SEG_DEFAULT)
5626 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5655 else if (GET_CODE (addr) == MULT)
5657 index = XEXP (addr, 0); /* index*scale */
5658 scale_rtx = XEXP (addr, 1);
5660 else if (GET_CODE (addr) == ASHIFT)
5664 /* We're called for lea too, which implements ashift on occasion. */
5665 index = XEXP (addr, 0);
5666 tmp = XEXP (addr, 1);
5667 if (GET_CODE (tmp) != CONST_INT)
5669 scale = INTVAL (tmp);
5670 if ((unsigned HOST_WIDE_INT) scale > 3)
5676 disp = addr; /* displacement */
5678 /* Extract the integral value of scale. */
5681 if (GET_CODE (scale_rtx) != CONST_INT)
5683 scale = INTVAL (scale_rtx);
5686 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5687 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5689 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5690 if (base_reg && index_reg && scale == 1
5691 && (index_reg == arg_pointer_rtx
5692 || index_reg == frame_pointer_rtx
5693 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5696 tmp = base, base = index, index = tmp;
5697 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5700 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5701 if ((base_reg == hard_frame_pointer_rtx
5702 || base_reg == frame_pointer_rtx
5703 || base_reg == arg_pointer_rtx) && !disp)
5706 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5707 Avoid this by transforming to [%esi+0]. */
5708 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5709 && base_reg && !index_reg && !disp
5711 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5714 /* Special case: encode reg+reg instead of reg*2. */
5715 if (!base && index && scale && scale == 2)
5716 base = index, base_reg = index_reg, scale = 1;
5718 /* Special case: scaling cannot be encoded without base or displacement. */
5719 if (!base && !disp && index && scale != 1)
5731 /* Return cost of the memory address x.
5732 For i386, it is better to use a complex address than let gcc copy
5733 the address into a reg and make a new pseudo. But not if the address
5734 requires to two regs - that would mean more pseudos with longer
5737 ix86_address_cost (rtx x)
5739 struct ix86_address parts;
5741 int ok = ix86_decompose_address (x, &parts);
5745 if (parts.base && GET_CODE (parts.base) == SUBREG)
5746 parts.base = SUBREG_REG (parts.base);
5747 if (parts.index && GET_CODE (parts.index) == SUBREG)
5748 parts.index = SUBREG_REG (parts.index);
5750 /* More complex memory references are better. */
5751 if (parts.disp && parts.disp != const0_rtx)
5753 if (parts.seg != SEG_DEFAULT)
5756 /* Attempt to minimize number of registers in the address. */
5758 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5760 && (!REG_P (parts.index)
5761 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5765 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5767 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5768 && parts.base != parts.index)
5771 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5772 since it's predecode logic can't detect the length of instructions
5773 and it degenerates to vector decoded. Increase cost of such
5774 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5775 to split such addresses or even refuse such addresses at all.
5777 Following addressing modes are affected:
5782 The first and last case may be avoidable by explicitly coding the zero in
5783 memory address, but I don't have AMD-K6 machine handy to check this
5787 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5788 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5789 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5795 /* If X is a machine specific address (i.e. a symbol or label being
5796 referenced as a displacement from the GOT implemented using an
5797 UNSPEC), then return the base term. Otherwise return X. */
5800 ix86_find_base_term (rtx x)
5806 if (GET_CODE (x) != CONST)
5809 if (GET_CODE (term) == PLUS
5810 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5811 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5812 term = XEXP (term, 0);
5813 if (GET_CODE (term) != UNSPEC
5814 || XINT (term, 1) != UNSPEC_GOTPCREL)
5817 term = XVECEXP (term, 0, 0);
5819 if (GET_CODE (term) != SYMBOL_REF
5820 && GET_CODE (term) != LABEL_REF)
5826 term = ix86_delegitimize_address (x);
5828 if (GET_CODE (term) != SYMBOL_REF
5829 && GET_CODE (term) != LABEL_REF)
5835 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5836 this is used for to form addresses to local data when -fPIC is in
5840 darwin_local_data_pic (rtx disp)
5842 if (GET_CODE (disp) == MINUS)
5844 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5845 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5846 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5848 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5849 if (! strcmp (sym_name, "<pic base>"))
5857 /* Determine if a given RTX is a valid constant. We already know this
5858 satisfies CONSTANT_P. */
5861 legitimate_constant_p (rtx x)
5863 switch (GET_CODE (x))
5868 if (GET_CODE (x) == PLUS)
5870 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5875 if (TARGET_MACHO && darwin_local_data_pic (x))
5878 /* Only some unspecs are valid as "constants". */
5879 if (GET_CODE (x) == UNSPEC)
5880 switch (XINT (x, 1))
5883 return TARGET_64BIT;
5886 x = XVECEXP (x, 0, 0);
5887 return (GET_CODE (x) == SYMBOL_REF
5888 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5890 x = XVECEXP (x, 0, 0);
5891 return (GET_CODE (x) == SYMBOL_REF
5892 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5897 /* We must have drilled down to a symbol. */
5898 if (GET_CODE (x) == LABEL_REF)
5900 if (GET_CODE (x) != SYMBOL_REF)
5905 /* TLS symbols are never valid. */
5906 if (SYMBOL_REF_TLS_MODEL (x))
5914 /* Otherwise we handle everything else in the move patterns. */
5918 /* Determine if it's legal to put X into the constant pool. This
5919 is not possible for the address of thread-local symbols, which
5920 is checked above. */
5923 ix86_cannot_force_const_mem (rtx x)
5925 return !legitimate_constant_p (x);
5928 /* Determine if a given RTX is a valid constant address. */
5931 constant_address_p (rtx x)
5933 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5936 /* Nonzero if the constant value X is a legitimate general operand
5937 when generating PIC code. It is given that flag_pic is on and
5938 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5941 legitimate_pic_operand_p (rtx x)
5945 switch (GET_CODE (x))
5948 inner = XEXP (x, 0);
5949 if (GET_CODE (inner) == PLUS
5950 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5951 inner = XEXP (inner, 0);
5953 /* Only some unspecs are valid as "constants". */
5954 if (GET_CODE (inner) == UNSPEC)
5955 switch (XINT (inner, 1))
5958 return TARGET_64BIT;
5960 x = XVECEXP (inner, 0, 0);
5961 return (GET_CODE (x) == SYMBOL_REF
5962 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5970 return legitimate_pic_address_disp_p (x);
5977 /* Determine if a given CONST RTX is a valid memory displacement
5981 legitimate_pic_address_disp_p (rtx disp)
5985 /* In 64bit mode we can allow direct addresses of symbols and labels
5986 when they are not dynamic symbols. */
5989 rtx op0 = disp, op1;
5991 switch (GET_CODE (disp))
5997 if (GET_CODE (XEXP (disp, 0)) != PLUS)
5999 op0 = XEXP (XEXP (disp, 0), 0);
6000 op1 = XEXP (XEXP (disp, 0), 1);
6001 if (GET_CODE (op1) != CONST_INT
6002 || INTVAL (op1) >= 16*1024*1024
6003 || INTVAL (op1) < -16*1024*1024)
6005 if (GET_CODE (op0) == LABEL_REF)
6007 if (GET_CODE (op0) != SYMBOL_REF)
6012 /* TLS references should always be enclosed in UNSPEC. */
6013 if (SYMBOL_REF_TLS_MODEL (op0))
6015 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6023 if (GET_CODE (disp) != CONST)
6025 disp = XEXP (disp, 0);
6029 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6030 of GOT tables. We should not need these anyway. */
6031 if (GET_CODE (disp) != UNSPEC
6032 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6033 && XINT (disp, 1) != UNSPEC_GOTOFF))
6036 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6037 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6043 if (GET_CODE (disp) == PLUS)
6045 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6047 disp = XEXP (disp, 0);
6051 if (TARGET_MACHO && darwin_local_data_pic (disp))
6054 if (GET_CODE (disp) != UNSPEC)
6057 switch (XINT (disp, 1))
6062 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6064 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6065 While ABI specify also 32bit relocation but we don't produce it in
6066 small PIC model at all. */
6067 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6068 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6070 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6072 case UNSPEC_GOTTPOFF:
6073 case UNSPEC_GOTNTPOFF:
6074 case UNSPEC_INDNTPOFF:
6077 disp = XVECEXP (disp, 0, 0);
6078 return (GET_CODE (disp) == SYMBOL_REF
6079 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6081 disp = XVECEXP (disp, 0, 0);
6082 return (GET_CODE (disp) == SYMBOL_REF
6083 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6085 disp = XVECEXP (disp, 0, 0);
6086 return (GET_CODE (disp) == SYMBOL_REF
6087 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6093 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6094 memory address for an instruction. The MODE argument is the machine mode
6095 for the MEM expression that wants to use this address.
6097 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6098 convert common non-canonical forms to canonical form so that they will
6102 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6104 struct ix86_address parts;
6105 rtx base, index, disp;
6106 HOST_WIDE_INT scale;
6107 const char *reason = NULL;
6108 rtx reason_rtx = NULL_RTX;
6110 if (TARGET_DEBUG_ADDR)
6113 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6114 GET_MODE_NAME (mode), strict);
6118 if (ix86_decompose_address (addr, &parts) <= 0)
6120 reason = "decomposition failed";
6125 index = parts.index;
6127 scale = parts.scale;
6129 /* Validate base register.
6131 Don't allow SUBREG's that span more than a word here. It can lead to spill
6132 failures when the base is one word out of a two word structure, which is
6133 represented internally as a DImode int. */
6142 else if (GET_CODE (base) == SUBREG
6143 && REG_P (SUBREG_REG (base))
6144 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6146 reg = SUBREG_REG (base);
6149 reason = "base is not a register";
6153 if (GET_MODE (base) != Pmode)
6155 reason = "base is not in Pmode";
6159 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6160 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6162 reason = "base is not valid";
6167 /* Validate index register.
6169 Don't allow SUBREG's that span more than a word here -- same as above. */
6178 else if (GET_CODE (index) == SUBREG
6179 && REG_P (SUBREG_REG (index))
6180 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6182 reg = SUBREG_REG (index);
6185 reason = "index is not a register";
6189 if (GET_MODE (index) != Pmode)
6191 reason = "index is not in Pmode";
6195 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6196 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6198 reason = "index is not valid";
6203 /* Validate scale factor. */
6206 reason_rtx = GEN_INT (scale);
6209 reason = "scale without index";
6213 if (scale != 2 && scale != 4 && scale != 8)
6215 reason = "scale is not a valid multiplier";
6220 /* Validate displacement. */
6225 if (GET_CODE (disp) == CONST
6226 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6227 switch (XINT (XEXP (disp, 0), 1))
6229 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6230 used. While ABI specify also 32bit relocations, we don't produce
6231 them at all and use IP relative instead. */
6234 gcc_assert (flag_pic);
6236 goto is_legitimate_pic;
6237 reason = "64bit address unspec";
6240 case UNSPEC_GOTPCREL:
6241 gcc_assert (flag_pic);
6242 goto is_legitimate_pic;
6244 case UNSPEC_GOTTPOFF:
6245 case UNSPEC_GOTNTPOFF:
6246 case UNSPEC_INDNTPOFF:
6252 reason = "invalid address unspec";
6256 else if (flag_pic && (SYMBOLIC_CONST (disp)
6258 && !machopic_operand_p (disp)
6263 if (TARGET_64BIT && (index || base))
6265 /* foo@dtpoff(%rX) is ok. */
6266 if (GET_CODE (disp) != CONST
6267 || GET_CODE (XEXP (disp, 0)) != PLUS
6268 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6269 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6270 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6271 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6273 reason = "non-constant pic memory reference";
6277 else if (! legitimate_pic_address_disp_p (disp))
6279 reason = "displacement is an invalid pic construct";
6283 /* This code used to verify that a symbolic pic displacement
6284 includes the pic_offset_table_rtx register.
6286 While this is good idea, unfortunately these constructs may
6287 be created by "adds using lea" optimization for incorrect
6296 This code is nonsensical, but results in addressing
6297 GOT table with pic_offset_table_rtx base. We can't
6298 just refuse it easily, since it gets matched by
6299 "addsi3" pattern, that later gets split to lea in the
6300 case output register differs from input. While this
6301 can be handled by separate addsi pattern for this case
6302 that never results in lea, this seems to be easier and
6303 correct fix for crash to disable this test. */
6305 else if (GET_CODE (disp) != LABEL_REF
6306 && GET_CODE (disp) != CONST_INT
6307 && (GET_CODE (disp) != CONST
6308 || !legitimate_constant_p (disp))
6309 && (GET_CODE (disp) != SYMBOL_REF
6310 || !legitimate_constant_p (disp)))
6312 reason = "displacement is not constant";
6315 else if (TARGET_64BIT
6316 && !x86_64_immediate_operand (disp, VOIDmode))
6318 reason = "displacement is out of range";
6323 /* Everything looks valid. */
6324 if (TARGET_DEBUG_ADDR)
6325 fprintf (stderr, "Success.\n");
6329 if (TARGET_DEBUG_ADDR)
6331 fprintf (stderr, "Error: %s\n", reason);
6332 debug_rtx (reason_rtx);
6337 /* Return a unique alias set for the GOT. */
6339 static HOST_WIDE_INT
6340 ix86_GOT_alias_set (void)
6342 static HOST_WIDE_INT set = -1;
6344 set = new_alias_set ();
6348 /* Return a legitimate reference for ORIG (an address) using the
6349 register REG. If REG is 0, a new pseudo is generated.
6351 There are two types of references that must be handled:
6353 1. Global data references must load the address from the GOT, via
6354 the PIC reg. An insn is emitted to do this load, and the reg is
6357 2. Static data references, constant pool addresses, and code labels
6358 compute the address as an offset from the GOT, whose base is in
6359 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6360 differentiate them from global data objects. The returned
6361 address is the PIC reg + an unspec constant.
6363 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6364 reg also appears in the address. */
6367 legitimize_pic_address (rtx orig, rtx reg)
6375 reg = gen_reg_rtx (Pmode);
6376 /* Use the generic Mach-O PIC machinery. */
6377 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6380 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6382 else if (TARGET_64BIT
6383 && ix86_cmodel != CM_SMALL_PIC
6384 && local_symbolic_operand (addr, Pmode))
6387 /* This symbol may be referenced via a displacement from the PIC
6388 base address (@GOTOFF). */
6390 if (reload_in_progress)
6391 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6392 if (GET_CODE (addr) == CONST)
6393 addr = XEXP (addr, 0);
6394 if (GET_CODE (addr) == PLUS)
6396 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6397 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6400 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6401 new = gen_rtx_CONST (Pmode, new);
6403 tmpreg = gen_reg_rtx (Pmode);
6406 emit_move_insn (tmpreg, new);
6410 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6411 tmpreg, 1, OPTAB_DIRECT);
6414 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6416 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6418 /* This symbol may be referenced via a displacement from the PIC
6419 base address (@GOTOFF). */
6421 if (reload_in_progress)
6422 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6423 if (GET_CODE (addr) == CONST)
6424 addr = XEXP (addr, 0);
6425 if (GET_CODE (addr) == PLUS)
6427 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6428 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6431 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6432 new = gen_rtx_CONST (Pmode, new);
6433 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6437 emit_move_insn (reg, new);
6441 else if (GET_CODE (addr) == SYMBOL_REF)
6445 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6446 new = gen_rtx_CONST (Pmode, new);
6447 new = gen_const_mem (Pmode, new);
6448 set_mem_alias_set (new, ix86_GOT_alias_set ());
6451 reg = gen_reg_rtx (Pmode);
6452 /* Use directly gen_movsi, otherwise the address is loaded
6453 into register for CSE. We don't want to CSE this addresses,
6454 instead we CSE addresses from the GOT table, so skip this. */
6455 emit_insn (gen_movsi (reg, new));
6460 /* This symbol must be referenced via a load from the
6461 Global Offset Table (@GOT). */
6463 if (reload_in_progress)
6464 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6465 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6466 new = gen_rtx_CONST (Pmode, new);
6467 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6468 new = gen_const_mem (Pmode, new);
6469 set_mem_alias_set (new, ix86_GOT_alias_set ());
6472 reg = gen_reg_rtx (Pmode);
6473 emit_move_insn (reg, new);
6479 if (GET_CODE (addr) == CONST_INT
6480 && !x86_64_immediate_operand (addr, VOIDmode))
6484 emit_move_insn (reg, addr);
6488 new = force_reg (Pmode, addr);
6490 else if (GET_CODE (addr) == CONST)
6492 addr = XEXP (addr, 0);
6494 /* We must match stuff we generate before. Assume the only
6495 unspecs that can get here are ours. Not that we could do
6496 anything with them anyway.... */
6497 if (GET_CODE (addr) == UNSPEC
6498 || (GET_CODE (addr) == PLUS
6499 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6501 gcc_assert (GET_CODE (addr) == PLUS);
6503 if (GET_CODE (addr) == PLUS)
6505 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6507 /* Check first to see if this is a constant offset from a @GOTOFF
6508 symbol reference. */
6509 if (local_symbolic_operand (op0, Pmode)
6510 && GET_CODE (op1) == CONST_INT)
6514 if (reload_in_progress)
6515 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6516 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6518 new = gen_rtx_PLUS (Pmode, new, op1);
6519 new = gen_rtx_CONST (Pmode, new);
6520 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6524 emit_move_insn (reg, new);
6530 if (INTVAL (op1) < -16*1024*1024
6531 || INTVAL (op1) >= 16*1024*1024)
6533 if (!x86_64_immediate_operand (op1, Pmode))
6534 op1 = force_reg (Pmode, op1);
6535 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6541 base = legitimize_pic_address (XEXP (addr, 0), reg);
6542 new = legitimize_pic_address (XEXP (addr, 1),
6543 base == reg ? NULL_RTX : reg);
6545 if (GET_CODE (new) == CONST_INT)
6546 new = plus_constant (base, INTVAL (new));
6549 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6551 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6552 new = XEXP (new, 1);
6554 new = gen_rtx_PLUS (Pmode, base, new);
6562 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6565 get_thread_pointer (int to_reg)
6569 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6573 reg = gen_reg_rtx (Pmode);
6574 insn = gen_rtx_SET (VOIDmode, reg, tp);
6575 insn = emit_insn (insn);
6580 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6581 false if we expect this to be used for a memory address and true if
6582 we expect to load the address into a register. */
6585 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6587 rtx dest, base, off, pic, tp;
6592 case TLS_MODEL_GLOBAL_DYNAMIC:
6593 dest = gen_reg_rtx (Pmode);
6594 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6596 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6598 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6601 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6602 insns = get_insns ();
6605 emit_libcall_block (insns, dest, rax, x);
6607 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6608 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6610 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6612 if (TARGET_GNU2_TLS)
6614 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6616 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6620 case TLS_MODEL_LOCAL_DYNAMIC:
6621 base = gen_reg_rtx (Pmode);
6622 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6624 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6626 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6629 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6630 insns = get_insns ();
6633 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6634 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6635 emit_libcall_block (insns, base, rax, note);
6637 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6638 emit_insn (gen_tls_local_dynamic_base_64 (base));
6640 emit_insn (gen_tls_local_dynamic_base_32 (base));
6642 if (TARGET_GNU2_TLS)
6644 rtx x = ix86_tls_module_base ();
6646 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6648 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6651 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6652 off = gen_rtx_CONST (Pmode, off);
6654 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6657 case TLS_MODEL_INITIAL_EXEC:
6661 type = UNSPEC_GOTNTPOFF;
6665 if (reload_in_progress)
6666 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6667 pic = pic_offset_table_rtx;
6668 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6670 else if (!TARGET_ANY_GNU_TLS)
6672 pic = gen_reg_rtx (Pmode);
6673 emit_insn (gen_set_got (pic));
6674 type = UNSPEC_GOTTPOFF;
6679 type = UNSPEC_INDNTPOFF;
6682 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6683 off = gen_rtx_CONST (Pmode, off);
6685 off = gen_rtx_PLUS (Pmode, pic, off);
6686 off = gen_const_mem (Pmode, off);
6687 set_mem_alias_set (off, ix86_GOT_alias_set ());
6689 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6691 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6692 off = force_reg (Pmode, off);
6693 return gen_rtx_PLUS (Pmode, base, off);
6697 base = get_thread_pointer (true);
6698 dest = gen_reg_rtx (Pmode);
6699 emit_insn (gen_subsi3 (dest, base, off));
6703 case TLS_MODEL_LOCAL_EXEC:
6704 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6705 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6706 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6707 off = gen_rtx_CONST (Pmode, off);
6709 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6711 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6712 return gen_rtx_PLUS (Pmode, base, off);
6716 base = get_thread_pointer (true);
6717 dest = gen_reg_rtx (Pmode);
6718 emit_insn (gen_subsi3 (dest, base, off));
6729 /* Try machine-dependent ways of modifying an illegitimate address
6730 to be legitimate. If we find one, return the new, valid address.
6731 This macro is used in only one place: `memory_address' in explow.c.
6733 OLDX is the address as it was before break_out_memory_refs was called.
6734 In some cases it is useful to look at this to decide what needs to be done.
6736 MODE and WIN are passed so that this macro can use
6737 GO_IF_LEGITIMATE_ADDRESS.
6739 It is always safe for this macro to do nothing. It exists to recognize
6740 opportunities to optimize the output.
6742 For the 80386, we handle X+REG by loading X into a register R and
6743 using R+REG. R will go in a general reg and indexing will be used.
6744 However, if REG is a broken-out memory address or multiplication,
6745 nothing needs to be done because REG can certainly go in a general reg.
6747 When -fpic is used, special handling is needed for symbolic references.
6748 See comments by legitimize_pic_address in i386.c for details. */
6751 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6756 if (TARGET_DEBUG_ADDR)
6758 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6759 GET_MODE_NAME (mode));
6763 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6765 return legitimize_tls_address (x, log, false);
6766 if (GET_CODE (x) == CONST
6767 && GET_CODE (XEXP (x, 0)) == PLUS
6768 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6769 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6771 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6772 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6775 if (flag_pic && SYMBOLIC_CONST (x))
6776 return legitimize_pic_address (x, 0);
6778 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6779 if (GET_CODE (x) == ASHIFT
6780 && GET_CODE (XEXP (x, 1)) == CONST_INT
6781 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6784 log = INTVAL (XEXP (x, 1));
6785 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6786 GEN_INT (1 << log));
6789 if (GET_CODE (x) == PLUS)
6791 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6793 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6794 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6795 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6798 log = INTVAL (XEXP (XEXP (x, 0), 1));
6799 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6800 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6801 GEN_INT (1 << log));
6804 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6805 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6806 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6809 log = INTVAL (XEXP (XEXP (x, 1), 1));
6810 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6811 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6812 GEN_INT (1 << log));
6815 /* Put multiply first if it isn't already. */
6816 if (GET_CODE (XEXP (x, 1)) == MULT)
6818 rtx tmp = XEXP (x, 0);
6819 XEXP (x, 0) = XEXP (x, 1);
6824 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6825 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6826 created by virtual register instantiation, register elimination, and
6827 similar optimizations. */
6828 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6831 x = gen_rtx_PLUS (Pmode,
6832 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6833 XEXP (XEXP (x, 1), 0)),
6834 XEXP (XEXP (x, 1), 1));
6838 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6839 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6840 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6842 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6843 && CONSTANT_P (XEXP (x, 1)))
6846 rtx other = NULL_RTX;
6848 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6850 constant = XEXP (x, 1);
6851 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6853 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6855 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6856 other = XEXP (x, 1);
6864 x = gen_rtx_PLUS (Pmode,
6865 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6866 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6867 plus_constant (other, INTVAL (constant)));
6871 if (changed && legitimate_address_p (mode, x, FALSE))
6874 if (GET_CODE (XEXP (x, 0)) == MULT)
6877 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6880 if (GET_CODE (XEXP (x, 1)) == MULT)
6883 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6887 && GET_CODE (XEXP (x, 1)) == REG
6888 && GET_CODE (XEXP (x, 0)) == REG)
6891 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6894 x = legitimize_pic_address (x, 0);
6897 if (changed && legitimate_address_p (mode, x, FALSE))
6900 if (GET_CODE (XEXP (x, 0)) == REG)
6902 rtx temp = gen_reg_rtx (Pmode);
6903 rtx val = force_operand (XEXP (x, 1), temp);
6905 emit_move_insn (temp, val);
6911 else if (GET_CODE (XEXP (x, 1)) == REG)
6913 rtx temp = gen_reg_rtx (Pmode);
6914 rtx val = force_operand (XEXP (x, 0), temp);
6916 emit_move_insn (temp, val);
6926 /* Print an integer constant expression in assembler syntax. Addition
6927 and subtraction are the only arithmetic that may appear in these
6928 expressions. FILE is the stdio stream to write to, X is the rtx, and
6929 CODE is the operand print code from the output string. */
6932 output_pic_addr_const (FILE *file, rtx x, int code)
6936 switch (GET_CODE (x))
6939 gcc_assert (flag_pic);
6944 output_addr_const (file, x);
6945 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6946 fputs ("@PLT", file);
6953 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6954 assemble_name (asm_out_file, buf);
6958 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6962 /* This used to output parentheses around the expression,
6963 but that does not work on the 386 (either ATT or BSD assembler). */
6964 output_pic_addr_const (file, XEXP (x, 0), code);
6968 if (GET_MODE (x) == VOIDmode)
6970 /* We can use %d if the number is <32 bits and positive. */
6971 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6972 fprintf (file, "0x%lx%08lx",
6973 (unsigned long) CONST_DOUBLE_HIGH (x),
6974 (unsigned long) CONST_DOUBLE_LOW (x));
6976 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6979 /* We can't handle floating point constants;
6980 PRINT_OPERAND must handle them. */
6981 output_operand_lossage ("floating constant misused");
6985 /* Some assemblers need integer constants to appear first. */
6986 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6988 output_pic_addr_const (file, XEXP (x, 0), code);
6990 output_pic_addr_const (file, XEXP (x, 1), code);
6994 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6995 output_pic_addr_const (file, XEXP (x, 1), code);
6997 output_pic_addr_const (file, XEXP (x, 0), code);
7003 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7004 output_pic_addr_const (file, XEXP (x, 0), code);
7006 output_pic_addr_const (file, XEXP (x, 1), code);
7008 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7012 gcc_assert (XVECLEN (x, 0) == 1);
7013 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7014 switch (XINT (x, 1))
7017 fputs ("@GOT", file);
7020 fputs ("@GOTOFF", file);
7022 case UNSPEC_GOTPCREL:
7023 fputs ("@GOTPCREL(%rip)", file);
7025 case UNSPEC_GOTTPOFF:
7026 /* FIXME: This might be @TPOFF in Sun ld too. */
7027 fputs ("@GOTTPOFF", file);
7030 fputs ("@TPOFF", file);
7034 fputs ("@TPOFF", file);
7036 fputs ("@NTPOFF", file);
7039 fputs ("@DTPOFF", file);
7041 case UNSPEC_GOTNTPOFF:
7043 fputs ("@GOTTPOFF(%rip)", file);
7045 fputs ("@GOTNTPOFF", file);
7047 case UNSPEC_INDNTPOFF:
7048 fputs ("@INDNTPOFF", file);
7051 output_operand_lossage ("invalid UNSPEC as operand");
7057 output_operand_lossage ("invalid expression as operand");
7061 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7062 We need to emit DTP-relative relocations. */
7065 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7067 fputs (ASM_LONG, file);
7068 output_addr_const (file, x);
7069 fputs ("@DTPOFF", file);
7075 fputs (", 0", file);
7082 /* In the name of slightly smaller debug output, and to cater to
7083 general assembler lossage, recognize PIC+GOTOFF and turn it back
7084 into a direct symbol reference. */
7087 ix86_delegitimize_address (rtx orig_x)
7091 if (GET_CODE (x) == MEM)
7096 if (GET_CODE (x) != CONST
7097 || GET_CODE (XEXP (x, 0)) != UNSPEC
7098 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7099 || GET_CODE (orig_x) != MEM)
7101 return XVECEXP (XEXP (x, 0), 0, 0);
7104 if (GET_CODE (x) != PLUS
7105 || GET_CODE (XEXP (x, 1)) != CONST)
7108 if (GET_CODE (XEXP (x, 0)) == REG
7109 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7110 /* %ebx + GOT/GOTOFF */
7112 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7114 /* %ebx + %reg * scale + GOT/GOTOFF */
7116 if (GET_CODE (XEXP (y, 0)) == REG
7117 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7119 else if (GET_CODE (XEXP (y, 1)) == REG
7120 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7124 if (GET_CODE (y) != REG
7125 && GET_CODE (y) != MULT
7126 && GET_CODE (y) != ASHIFT)
7132 x = XEXP (XEXP (x, 1), 0);
7133 if (GET_CODE (x) == UNSPEC
7134 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7135 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7138 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7139 return XVECEXP (x, 0, 0);
7142 if (GET_CODE (x) == PLUS
7143 && GET_CODE (XEXP (x, 0)) == UNSPEC
7144 && GET_CODE (XEXP (x, 1)) == CONST_INT
7145 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7146 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7147 && GET_CODE (orig_x) != MEM)))
7149 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7151 return gen_rtx_PLUS (Pmode, y, x);
7155 if (TARGET_MACHO && darwin_local_data_pic (x)
7156 && GET_CODE (orig_x) != MEM)
7160 return gen_rtx_PLUS (Pmode, y, x);
7167 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7172 if (mode == CCFPmode || mode == CCFPUmode)
7174 enum rtx_code second_code, bypass_code;
7175 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7176 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7177 code = ix86_fp_compare_code_to_integer (code);
7181 code = reverse_condition (code);
7192 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7196 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7197 Those same assemblers have the same but opposite lossage on cmov. */
7198 gcc_assert (mode == CCmode);
7199 suffix = fp ? "nbe" : "a";
7219 gcc_assert (mode == CCmode);
7241 gcc_assert (mode == CCmode);
7242 suffix = fp ? "nb" : "ae";
7245 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7249 gcc_assert (mode == CCmode);
7253 suffix = fp ? "u" : "p";
7256 suffix = fp ? "nu" : "np";
7261 fputs (suffix, file);
7264 /* Print the name of register X to FILE based on its machine mode and number.
7265 If CODE is 'w', pretend the mode is HImode.
7266 If CODE is 'b', pretend the mode is QImode.
7267 If CODE is 'k', pretend the mode is SImode.
7268 If CODE is 'q', pretend the mode is DImode.
7269 If CODE is 'h', pretend the reg is the 'high' byte register.
7270 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7273 print_reg (rtx x, int code, FILE *file)
7275 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7276 && REGNO (x) != FRAME_POINTER_REGNUM
7277 && REGNO (x) != FLAGS_REG
7278 && REGNO (x) != FPSR_REG);
7280 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7283 if (code == 'w' || MMX_REG_P (x))
7285 else if (code == 'b')
7287 else if (code == 'k')
7289 else if (code == 'q')
7291 else if (code == 'y')
7293 else if (code == 'h')
7296 code = GET_MODE_SIZE (GET_MODE (x));
7298 /* Irritatingly, AMD extended registers use different naming convention
7299 from the normal registers. */
7300 if (REX_INT_REG_P (x))
7302 gcc_assert (TARGET_64BIT);
7306 error ("extended registers have no high halves");
7309 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7312 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7315 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7318 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7321 error ("unsupported operand size for extended register");
7329 if (STACK_TOP_P (x))
7331 fputs ("st(0)", file);
7338 if (! ANY_FP_REG_P (x))
7339 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7344 fputs (hi_reg_name[REGNO (x)], file);
7347 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7349 fputs (qi_reg_name[REGNO (x)], file);
7352 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7354 fputs (qi_high_reg_name[REGNO (x)], file);
7361 /* Locate some local-dynamic symbol still in use by this function
7362 so that we can print its name in some tls_local_dynamic_base
7366 get_some_local_dynamic_name (void)
7370 if (cfun->machine->some_ld_name)
7371 return cfun->machine->some_ld_name;
7373 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7375 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7376 return cfun->machine->some_ld_name;
7382 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7386 if (GET_CODE (x) == SYMBOL_REF
7387 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7389 cfun->machine->some_ld_name = XSTR (x, 0);
7397 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7398 C -- print opcode suffix for set/cmov insn.
7399 c -- like C, but print reversed condition
7400 F,f -- likewise, but for floating-point.
7401 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7403 R -- print the prefix for register names.
7404 z -- print the opcode suffix for the size of the current operand.
7405 * -- print a star (in certain assembler syntax)
7406 A -- print an absolute memory reference.
7407 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7408 s -- print a shift double count, followed by the assemblers argument
7410 b -- print the QImode name of the register for the indicated operand.
7411 %b0 would print %al if operands[0] is reg 0.
7412 w -- likewise, print the HImode name of the register.
7413 k -- likewise, print the SImode name of the register.
7414 q -- likewise, print the DImode name of the register.
7415 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7416 y -- print "st(0)" instead of "st" as a register.
7417 D -- print condition for SSE cmp instruction.
7418 P -- if PIC, print an @PLT suffix.
7419 X -- don't print any sort of PIC '@' suffix for a symbol.
7420 & -- print some in-use local-dynamic symbol name.
7421 H -- print a memory address offset by 8; used for sse high-parts
7425 print_operand (FILE *file, rtx x, int code)
7432 if (ASSEMBLER_DIALECT == ASM_ATT)
7437 assemble_name (file, get_some_local_dynamic_name ());
7441 switch (ASSEMBLER_DIALECT)
7448 /* Intel syntax. For absolute addresses, registers should not
7449 be surrounded by braces. */
7450 if (GET_CODE (x) != REG)
7453 PRINT_OPERAND (file, x, 0);
7463 PRINT_OPERAND (file, x, 0);
7468 if (ASSEMBLER_DIALECT == ASM_ATT)
7473 if (ASSEMBLER_DIALECT == ASM_ATT)
7478 if (ASSEMBLER_DIALECT == ASM_ATT)
7483 if (ASSEMBLER_DIALECT == ASM_ATT)
7488 if (ASSEMBLER_DIALECT == ASM_ATT)
7493 if (ASSEMBLER_DIALECT == ASM_ATT)
7498 /* 387 opcodes don't get size suffixes if the operands are
7500 if (STACK_REG_P (x))
7503 /* Likewise if using Intel opcodes. */
7504 if (ASSEMBLER_DIALECT == ASM_INTEL)
7507 /* This is the size of op from size of operand. */
7508 switch (GET_MODE_SIZE (GET_MODE (x)))
7511 #ifdef HAVE_GAS_FILDS_FISTS
7517 if (GET_MODE (x) == SFmode)
7532 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7534 #ifdef GAS_MNEMONICS
7560 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7562 PRINT_OPERAND (file, x, 0);
7568 /* Little bit of braindamage here. The SSE compare instructions
7569 does use completely different names for the comparisons that the
7570 fp conditional moves. */
7571 switch (GET_CODE (x))
7586 fputs ("unord", file);
7590 fputs ("neq", file);
7594 fputs ("nlt", file);
7598 fputs ("nle", file);
7601 fputs ("ord", file);
7608 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7609 if (ASSEMBLER_DIALECT == ASM_ATT)
7611 switch (GET_MODE (x))
7613 case HImode: putc ('w', file); break;
7615 case SFmode: putc ('l', file); break;
7617 case DFmode: putc ('q', file); break;
7618 default: gcc_unreachable ();
7625 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7628 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7632 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7635 /* Like above, but reverse condition */
7637 /* Check to see if argument to %c is really a constant
7638 and not a condition code which needs to be reversed. */
7639 if (!COMPARISON_P (x))
7641 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7644 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7647 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7648 if (ASSEMBLER_DIALECT == ASM_ATT)
7651 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7655 /* It doesn't actually matter what mode we use here, as we're
7656 only going to use this for printing. */
7657 x = adjust_address_nv (x, DImode, 8);
7664 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7667 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7670 int pred_val = INTVAL (XEXP (x, 0));
7672 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7673 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7675 int taken = pred_val > REG_BR_PROB_BASE / 2;
7676 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7678 /* Emit hints only in the case default branch prediction
7679 heuristics would fail. */
7680 if (taken != cputaken)
7682 /* We use 3e (DS) prefix for taken branches and
7683 2e (CS) prefix for not taken branches. */
7685 fputs ("ds ; ", file);
7687 fputs ("cs ; ", file);
7694 output_operand_lossage ("invalid operand code '%c'", code);
7698 if (GET_CODE (x) == REG)
7699 print_reg (x, code, file);
7701 else if (GET_CODE (x) == MEM)
7703 /* No `byte ptr' prefix for call instructions. */
7704 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7707 switch (GET_MODE_SIZE (GET_MODE (x)))
7709 case 1: size = "BYTE"; break;
7710 case 2: size = "WORD"; break;
7711 case 4: size = "DWORD"; break;
7712 case 8: size = "QWORD"; break;
7713 case 12: size = "XWORD"; break;
7714 case 16: size = "XMMWORD"; break;
7719 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7722 else if (code == 'w')
7724 else if (code == 'k')
7728 fputs (" PTR ", file);
7732 /* Avoid (%rip) for call operands. */
7733 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7734 && GET_CODE (x) != CONST_INT)
7735 output_addr_const (file, x);
7736 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7737 output_operand_lossage ("invalid constraints for operand");
7742 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7747 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7748 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7750 if (ASSEMBLER_DIALECT == ASM_ATT)
7752 fprintf (file, "0x%08lx", l);
7755 /* These float cases don't actually occur as immediate operands. */
7756 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7760 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7761 fprintf (file, "%s", dstr);
7764 else if (GET_CODE (x) == CONST_DOUBLE
7765 && GET_MODE (x) == XFmode)
7769 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7770 fprintf (file, "%s", dstr);
7775 /* We have patterns that allow zero sets of memory, for instance.
7776 In 64-bit mode, we should probably support all 8-byte vectors,
7777 since we can in fact encode that into an immediate. */
7778 if (GET_CODE (x) == CONST_VECTOR)
7780 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7786 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7788 if (ASSEMBLER_DIALECT == ASM_ATT)
7791 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7792 || GET_CODE (x) == LABEL_REF)
7794 if (ASSEMBLER_DIALECT == ASM_ATT)
7797 fputs ("OFFSET FLAT:", file);
7800 if (GET_CODE (x) == CONST_INT)
7801 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7803 output_pic_addr_const (file, x, code);
7805 output_addr_const (file, x);
7809 /* Print a memory operand whose address is ADDR. */
7812 print_operand_address (FILE *file, rtx addr)
7814 struct ix86_address parts;
7815 rtx base, index, disp;
7817 int ok = ix86_decompose_address (addr, &parts);
7822 index = parts.index;
7824 scale = parts.scale;
7832 if (USER_LABEL_PREFIX[0] == 0)
7834 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7840 if (!base && !index)
7842 /* Displacement only requires special attention. */
7844 if (GET_CODE (disp) == CONST_INT)
7846 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7848 if (USER_LABEL_PREFIX[0] == 0)
7850 fputs ("ds:", file);
7852 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7855 output_pic_addr_const (file, disp, 0);
7857 output_addr_const (file, disp);
7859 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7862 if (GET_CODE (disp) == CONST
7863 && GET_CODE (XEXP (disp, 0)) == PLUS
7864 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7865 disp = XEXP (XEXP (disp, 0), 0);
7866 if (GET_CODE (disp) == LABEL_REF
7867 || (GET_CODE (disp) == SYMBOL_REF
7868 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7869 fputs ("(%rip)", file);
7874 if (ASSEMBLER_DIALECT == ASM_ATT)
7879 output_pic_addr_const (file, disp, 0);
7880 else if (GET_CODE (disp) == LABEL_REF)
7881 output_asm_label (disp);
7883 output_addr_const (file, disp);
7888 print_reg (base, 0, file);
7892 print_reg (index, 0, file);
7894 fprintf (file, ",%d", scale);
7900 rtx offset = NULL_RTX;
7904 /* Pull out the offset of a symbol; print any symbol itself. */
7905 if (GET_CODE (disp) == CONST
7906 && GET_CODE (XEXP (disp, 0)) == PLUS
7907 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7909 offset = XEXP (XEXP (disp, 0), 1);
7910 disp = gen_rtx_CONST (VOIDmode,
7911 XEXP (XEXP (disp, 0), 0));
7915 output_pic_addr_const (file, disp, 0);
7916 else if (GET_CODE (disp) == LABEL_REF)
7917 output_asm_label (disp);
7918 else if (GET_CODE (disp) == CONST_INT)
7921 output_addr_const (file, disp);
7927 print_reg (base, 0, file);
7930 if (INTVAL (offset) >= 0)
7932 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7936 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7943 print_reg (index, 0, file);
7945 fprintf (file, "*%d", scale);
7953 output_addr_const_extra (FILE *file, rtx x)
7957 if (GET_CODE (x) != UNSPEC)
7960 op = XVECEXP (x, 0, 0);
7961 switch (XINT (x, 1))
7963 case UNSPEC_GOTTPOFF:
7964 output_addr_const (file, op);
7965 /* FIXME: This might be @TPOFF in Sun ld. */
7966 fputs ("@GOTTPOFF", file);
7969 output_addr_const (file, op);
7970 fputs ("@TPOFF", file);
7973 output_addr_const (file, op);
7975 fputs ("@TPOFF", file);
7977 fputs ("@NTPOFF", file);
7980 output_addr_const (file, op);
7981 fputs ("@DTPOFF", file);
7983 case UNSPEC_GOTNTPOFF:
7984 output_addr_const (file, op);
7986 fputs ("@GOTTPOFF(%rip)", file);
7988 fputs ("@GOTNTPOFF", file);
7990 case UNSPEC_INDNTPOFF:
7991 output_addr_const (file, op);
7992 fputs ("@INDNTPOFF", file);
8002 /* Split one or more DImode RTL references into pairs of SImode
8003 references. The RTL can be REG, offsettable MEM, integer constant, or
8004 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8005 split and "num" is its length. lo_half and hi_half are output arrays
8006 that parallel "operands". */
8009 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8013 rtx op = operands[num];
8015 /* simplify_subreg refuse to split volatile memory addresses,
8016 but we still have to handle it. */
8017 if (GET_CODE (op) == MEM)
8019 lo_half[num] = adjust_address (op, SImode, 0);
8020 hi_half[num] = adjust_address (op, SImode, 4);
8024 lo_half[num] = simplify_gen_subreg (SImode, op,
8025 GET_MODE (op) == VOIDmode
8026 ? DImode : GET_MODE (op), 0);
8027 hi_half[num] = simplify_gen_subreg (SImode, op,
8028 GET_MODE (op) == VOIDmode
8029 ? DImode : GET_MODE (op), 4);
8033 /* Split one or more TImode RTL references into pairs of DImode
8034 references. The RTL can be REG, offsettable MEM, integer constant, or
8035 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8036 split and "num" is its length. lo_half and hi_half are output arrays
8037 that parallel "operands". */
8040 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8044 rtx op = operands[num];
8046 /* simplify_subreg refuse to split volatile memory addresses, but we
8047 still have to handle it. */
8048 if (GET_CODE (op) == MEM)
8050 lo_half[num] = adjust_address (op, DImode, 0);
8051 hi_half[num] = adjust_address (op, DImode, 8);
8055 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8056 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8061 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8062 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8063 is the expression of the binary operation. The output may either be
8064 emitted here, or returned to the caller, like all output_* functions.
8066 There is no guarantee that the operands are the same mode, as they
8067 might be within FLOAT or FLOAT_EXTEND expressions. */
8069 #ifndef SYSV386_COMPAT
8070 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8071 wants to fix the assemblers because that causes incompatibility
8072 with gcc. No-one wants to fix gcc because that causes
8073 incompatibility with assemblers... You can use the option of
8074 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8075 #define SYSV386_COMPAT 1
8079 output_387_binary_op (rtx insn, rtx *operands)
8081 static char buf[30];
8084 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8086 #ifdef ENABLE_CHECKING
8087 /* Even if we do not want to check the inputs, this documents input
8088 constraints. Which helps in understanding the following code. */
8089 if (STACK_REG_P (operands[0])
8090 && ((REG_P (operands[1])
8091 && REGNO (operands[0]) == REGNO (operands[1])
8092 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8093 || (REG_P (operands[2])
8094 && REGNO (operands[0]) == REGNO (operands[2])
8095 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8096 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8099 gcc_assert (is_sse);
8102 switch (GET_CODE (operands[3]))
8105 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8106 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8114 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8115 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8123 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8124 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8132 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8133 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8147 if (GET_MODE (operands[0]) == SFmode)
8148 strcat (buf, "ss\t{%2, %0|%0, %2}");
8150 strcat (buf, "sd\t{%2, %0|%0, %2}");
8155 switch (GET_CODE (operands[3]))
8159 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8161 rtx temp = operands[2];
8162 operands[2] = operands[1];
8166 /* know operands[0] == operands[1]. */
8168 if (GET_CODE (operands[2]) == MEM)
8174 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8176 if (STACK_TOP_P (operands[0]))
8177 /* How is it that we are storing to a dead operand[2]?
8178 Well, presumably operands[1] is dead too. We can't
8179 store the result to st(0) as st(0) gets popped on this
8180 instruction. Instead store to operands[2] (which I
8181 think has to be st(1)). st(1) will be popped later.
8182 gcc <= 2.8.1 didn't have this check and generated
8183 assembly code that the Unixware assembler rejected. */
8184 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8186 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8190 if (STACK_TOP_P (operands[0]))
8191 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8193 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8198 if (GET_CODE (operands[1]) == MEM)
8204 if (GET_CODE (operands[2]) == MEM)
8210 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8213 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8214 derived assemblers, confusingly reverse the direction of
8215 the operation for fsub{r} and fdiv{r} when the
8216 destination register is not st(0). The Intel assembler
8217 doesn't have this brain damage. Read !SYSV386_COMPAT to
8218 figure out what the hardware really does. */
8219 if (STACK_TOP_P (operands[0]))
8220 p = "{p\t%0, %2|rp\t%2, %0}";
8222 p = "{rp\t%2, %0|p\t%0, %2}";
8224 if (STACK_TOP_P (operands[0]))
8225 /* As above for fmul/fadd, we can't store to st(0). */
8226 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8228 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8233 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8236 if (STACK_TOP_P (operands[0]))
8237 p = "{rp\t%0, %1|p\t%1, %0}";
8239 p = "{p\t%1, %0|rp\t%0, %1}";
8241 if (STACK_TOP_P (operands[0]))
8242 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8244 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8249 if (STACK_TOP_P (operands[0]))
8251 if (STACK_TOP_P (operands[1]))
8252 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8254 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8257 else if (STACK_TOP_P (operands[1]))
8260 p = "{\t%1, %0|r\t%0, %1}";
8262 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8268 p = "{r\t%2, %0|\t%0, %2}";
8270 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8283 /* Return needed mode for entity in optimize_mode_switching pass. */
8286 ix86_mode_needed (int entity, rtx insn)
8288 enum attr_i387_cw mode;
8290 /* The mode UNINITIALIZED is used to store control word after a
8291 function call or ASM pattern. The mode ANY specify that function
8292 has no requirements on the control word and make no changes in the
8293 bits we are interested in. */
8296 || (NONJUMP_INSN_P (insn)
8297 && (asm_noperands (PATTERN (insn)) >= 0
8298 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8299 return I387_CW_UNINITIALIZED;
8301 if (recog_memoized (insn) < 0)
8304 mode = get_attr_i387_cw (insn);
8309 if (mode == I387_CW_TRUNC)
8314 if (mode == I387_CW_FLOOR)
8319 if (mode == I387_CW_CEIL)
8324 if (mode == I387_CW_MASK_PM)
8335 /* Output code to initialize control word copies used by trunc?f?i and
8336 rounding patterns. CURRENT_MODE is set to current control word,
8337 while NEW_MODE is set to new control word. */
8340 emit_i387_cw_initialization (int mode)
8342 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8347 rtx reg = gen_reg_rtx (HImode);
8349 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8350 emit_move_insn (reg, stored_mode);
8352 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8357 /* round toward zero (truncate) */
8358 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8359 slot = SLOT_CW_TRUNC;
8363 /* round down toward -oo */
8364 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8365 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8366 slot = SLOT_CW_FLOOR;
8370 /* round up toward +oo */
8371 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8372 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8373 slot = SLOT_CW_CEIL;
8376 case I387_CW_MASK_PM:
8377 /* mask precision exception for nearbyint() */
8378 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8379 slot = SLOT_CW_MASK_PM;
8391 /* round toward zero (truncate) */
8392 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8393 slot = SLOT_CW_TRUNC;
8397 /* round down toward -oo */
8398 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8399 slot = SLOT_CW_FLOOR;
8403 /* round up toward +oo */
8404 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8405 slot = SLOT_CW_CEIL;
8408 case I387_CW_MASK_PM:
8409 /* mask precision exception for nearbyint() */
8410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8411 slot = SLOT_CW_MASK_PM;
8419 gcc_assert (slot < MAX_386_STACK_LOCALS);
8421 new_mode = assign_386_stack_local (HImode, slot);
8422 emit_move_insn (new_mode, reg);
8425 /* Output code for INSN to convert a float to a signed int. OPERANDS
8426 are the insn operands. The output may be [HSD]Imode and the input
8427 operand may be [SDX]Fmode. */
8430 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8432 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8433 int dimode_p = GET_MODE (operands[0]) == DImode;
8434 int round_mode = get_attr_i387_cw (insn);
8436 /* Jump through a hoop or two for DImode, since the hardware has no
8437 non-popping instruction. We used to do this a different way, but
8438 that was somewhat fragile and broke with post-reload splitters. */
8439 if ((dimode_p || fisttp) && !stack_top_dies)
8440 output_asm_insn ("fld\t%y1", operands);
8442 gcc_assert (STACK_TOP_P (operands[1]));
8443 gcc_assert (GET_CODE (operands[0]) == MEM);
8446 output_asm_insn ("fisttp%z0\t%0", operands);
8449 if (round_mode != I387_CW_ANY)
8450 output_asm_insn ("fldcw\t%3", operands);
8451 if (stack_top_dies || dimode_p)
8452 output_asm_insn ("fistp%z0\t%0", operands);
8454 output_asm_insn ("fist%z0\t%0", operands);
8455 if (round_mode != I387_CW_ANY)
8456 output_asm_insn ("fldcw\t%2", operands);
8462 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8463 should be used. UNORDERED_P is true when fucom should be used. */
8466 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8469 rtx cmp_op0, cmp_op1;
8470 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8474 cmp_op0 = operands[0];
8475 cmp_op1 = operands[1];
8479 cmp_op0 = operands[1];
8480 cmp_op1 = operands[2];
8485 if (GET_MODE (operands[0]) == SFmode)
8487 return "ucomiss\t{%1, %0|%0, %1}";
8489 return "comiss\t{%1, %0|%0, %1}";
8492 return "ucomisd\t{%1, %0|%0, %1}";
8494 return "comisd\t{%1, %0|%0, %1}";
8497 gcc_assert (STACK_TOP_P (cmp_op0));
8499 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8501 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8505 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8506 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8509 return "ftst\n\tfnstsw\t%0";
8512 if (STACK_REG_P (cmp_op1)
8514 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8515 && REGNO (cmp_op1) != FIRST_STACK_REG)
8517 /* If both the top of the 387 stack dies, and the other operand
8518 is also a stack register that dies, then this must be a
8519 `fcompp' float compare */
8523 /* There is no double popping fcomi variant. Fortunately,
8524 eflags is immune from the fstp's cc clobbering. */
8526 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8528 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8529 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8534 return "fucompp\n\tfnstsw\t%0";
8536 return "fcompp\n\tfnstsw\t%0";
8541 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8543 static const char * const alt[16] =
8545 "fcom%z2\t%y2\n\tfnstsw\t%0",
8546 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8547 "fucom%z2\t%y2\n\tfnstsw\t%0",
8548 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8550 "ficom%z2\t%y2\n\tfnstsw\t%0",
8551 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8555 "fcomi\t{%y1, %0|%0, %y1}",
8556 "fcomip\t{%y1, %0|%0, %y1}",
8557 "fucomi\t{%y1, %0|%0, %y1}",
8558 "fucomip\t{%y1, %0|%0, %y1}",
8569 mask = eflags_p << 3;
8570 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8571 mask |= unordered_p << 1;
8572 mask |= stack_top_dies;
8574 gcc_assert (mask < 16);
8583 ix86_output_addr_vec_elt (FILE *file, int value)
8585 const char *directive = ASM_LONG;
8589 directive = ASM_QUAD;
8591 gcc_assert (!TARGET_64BIT);
8594 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8598 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8601 fprintf (file, "%s%s%d-%s%d\n",
8602 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8603 else if (HAVE_AS_GOTOFF_IN_DATA)
8604 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8606 else if (TARGET_MACHO)
8608 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8609 machopic_output_function_base_name (file);
8610 fprintf(file, "\n");
8614 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8615 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8618 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8622 ix86_expand_clear (rtx dest)
8626 /* We play register width games, which are only valid after reload. */
8627 gcc_assert (reload_completed);
8629 /* Avoid HImode and its attendant prefix byte. */
8630 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8631 dest = gen_rtx_REG (SImode, REGNO (dest));
8633 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8635 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8636 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8638 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8639 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8645 /* X is an unchanging MEM. If it is a constant pool reference, return
8646 the constant pool rtx, else NULL. */
8649 maybe_get_pool_constant (rtx x)
8651 x = ix86_delegitimize_address (XEXP (x, 0));
8653 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8654 return get_pool_constant (x);
8660 ix86_expand_move (enum machine_mode mode, rtx operands[])
8662 int strict = (reload_in_progress || reload_completed);
8664 enum tls_model model;
8669 if (GET_CODE (op1) == SYMBOL_REF)
8671 model = SYMBOL_REF_TLS_MODEL (op1);
8674 op1 = legitimize_tls_address (op1, model, true);
8675 op1 = force_operand (op1, op0);
8680 else if (GET_CODE (op1) == CONST
8681 && GET_CODE (XEXP (op1, 0)) == PLUS
8682 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8684 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8687 rtx addend = XEXP (XEXP (op1, 0), 1);
8688 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8689 op1 = force_operand (op1, NULL);
8690 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8691 op0, 1, OPTAB_DIRECT);
8697 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8702 rtx temp = ((reload_in_progress
8703 || ((op0 && GET_CODE (op0) == REG)
8705 ? op0 : gen_reg_rtx (Pmode));
8706 op1 = machopic_indirect_data_reference (op1, temp);
8707 op1 = machopic_legitimize_pic_address (op1, mode,
8708 temp == op1 ? 0 : temp);
8710 else if (MACHOPIC_INDIRECT)
8711 op1 = machopic_indirect_data_reference (op1, 0);
8715 if (GET_CODE (op0) == MEM)
8716 op1 = force_reg (Pmode, op1);
8718 op1 = legitimize_address (op1, op1, Pmode);
8719 #endif /* TARGET_MACHO */
8723 if (GET_CODE (op0) == MEM
8724 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8725 || !push_operand (op0, mode))
8726 && GET_CODE (op1) == MEM)
8727 op1 = force_reg (mode, op1);
8729 if (push_operand (op0, mode)
8730 && ! general_no_elim_operand (op1, mode))
8731 op1 = copy_to_mode_reg (mode, op1);
8733 /* Force large constants in 64bit compilation into register
8734 to get them CSEed. */
8735 if (TARGET_64BIT && mode == DImode
8736 && immediate_operand (op1, mode)
8737 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8738 && !register_operand (op0, mode)
8739 && optimize && !reload_completed && !reload_in_progress)
8740 op1 = copy_to_mode_reg (mode, op1);
8742 if (FLOAT_MODE_P (mode))
8744 /* If we are loading a floating point constant to a register,
8745 force the value to memory now, since we'll get better code
8746 out the back end. */
8750 else if (GET_CODE (op1) == CONST_DOUBLE)
8752 op1 = validize_mem (force_const_mem (mode, op1));
8753 if (!register_operand (op0, mode))
8755 rtx temp = gen_reg_rtx (mode);
8756 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8757 emit_move_insn (op0, temp);
8764 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8768 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8770 rtx op0 = operands[0], op1 = operands[1];
8772 /* Force constants other than zero into memory. We do not know how
8773 the instructions used to build constants modify the upper 64 bits
8774 of the register, once we have that information we may be able
8775 to handle some of them more efficiently. */
8776 if ((reload_in_progress | reload_completed) == 0
8777 && register_operand (op0, mode)
8778 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8779 op1 = validize_mem (force_const_mem (mode, op1));
8781 /* Make operand1 a register if it isn't already. */
8783 && !register_operand (op0, mode)
8784 && !register_operand (op1, mode))
8786 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8790 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8793 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8794 straight to ix86_expand_vector_move. */
8797 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8806 /* If we're optimizing for size, movups is the smallest. */
8809 op0 = gen_lowpart (V4SFmode, op0);
8810 op1 = gen_lowpart (V4SFmode, op1);
8811 emit_insn (gen_sse_movups (op0, op1));
8815 /* ??? If we have typed data, then it would appear that using
8816 movdqu is the only way to get unaligned data loaded with
8818 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8820 op0 = gen_lowpart (V16QImode, op0);
8821 op1 = gen_lowpart (V16QImode, op1);
8822 emit_insn (gen_sse2_movdqu (op0, op1));
8826 if (TARGET_SSE2 && mode == V2DFmode)
8830 /* When SSE registers are split into halves, we can avoid
8831 writing to the top half twice. */
8832 if (TARGET_SSE_SPLIT_REGS)
8834 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8839 /* ??? Not sure about the best option for the Intel chips.
8840 The following would seem to satisfy; the register is
8841 entirely cleared, breaking the dependency chain. We
8842 then store to the upper half, with a dependency depth
8843 of one. A rumor has it that Intel recommends two movsd
8844 followed by an unpacklpd, but this is unconfirmed. And
8845 given that the dependency depth of the unpacklpd would
8846 still be one, I'm not sure why this would be better. */
8847 zero = CONST0_RTX (V2DFmode);
8850 m = adjust_address (op1, DFmode, 0);
8851 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8852 m = adjust_address (op1, DFmode, 8);
8853 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8857 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8858 emit_move_insn (op0, CONST0_RTX (mode));
8860 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8862 if (mode != V4SFmode)
8863 op0 = gen_lowpart (V4SFmode, op0);
8864 m = adjust_address (op1, V2SFmode, 0);
8865 emit_insn (gen_sse_loadlps (op0, op0, m));
8866 m = adjust_address (op1, V2SFmode, 8);
8867 emit_insn (gen_sse_loadhps (op0, op0, m));
8870 else if (MEM_P (op0))
8872 /* If we're optimizing for size, movups is the smallest. */
8875 op0 = gen_lowpart (V4SFmode, op0);
8876 op1 = gen_lowpart (V4SFmode, op1);
8877 emit_insn (gen_sse_movups (op0, op1));
8881 /* ??? Similar to above, only less clear because of quote
8882 typeless stores unquote. */
8883 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8884 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8886 op0 = gen_lowpart (V16QImode, op0);
8887 op1 = gen_lowpart (V16QImode, op1);
8888 emit_insn (gen_sse2_movdqu (op0, op1));
8892 if (TARGET_SSE2 && mode == V2DFmode)
8894 m = adjust_address (op0, DFmode, 0);
8895 emit_insn (gen_sse2_storelpd (m, op1));
8896 m = adjust_address (op0, DFmode, 8);
8897 emit_insn (gen_sse2_storehpd (m, op1));
8901 if (mode != V4SFmode)
8902 op1 = gen_lowpart (V4SFmode, op1);
8903 m = adjust_address (op0, V2SFmode, 0);
8904 emit_insn (gen_sse_storelps (m, op1));
8905 m = adjust_address (op0, V2SFmode, 8);
8906 emit_insn (gen_sse_storehps (m, op1));
8913 /* Expand a push in MODE. This is some mode for which we do not support
8914 proper push instructions, at least from the registers that we expect
8915 the value to live in. */
8918 ix86_expand_push (enum machine_mode mode, rtx x)
8922 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8923 GEN_INT (-GET_MODE_SIZE (mode)),
8924 stack_pointer_rtx, 1, OPTAB_DIRECT);
8925 if (tmp != stack_pointer_rtx)
8926 emit_move_insn (stack_pointer_rtx, tmp);
8928 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8929 emit_move_insn (tmp, x);
8932 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8933 destination to use for the operation. If different from the true
8934 destination in operands[0], a copy operation will be required. */
8937 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8940 int matching_memory;
8941 rtx src1, src2, dst;
8947 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8948 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8949 && (rtx_equal_p (dst, src2)
8950 || immediate_operand (src1, mode)))
8957 /* If the destination is memory, and we do not have matching source
8958 operands, do things in registers. */
8959 matching_memory = 0;
8960 if (GET_CODE (dst) == MEM)
8962 if (rtx_equal_p (dst, src1))
8963 matching_memory = 1;
8964 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8965 && rtx_equal_p (dst, src2))
8966 matching_memory = 2;
8968 dst = gen_reg_rtx (mode);
8971 /* Both source operands cannot be in memory. */
8972 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8974 if (matching_memory != 2)
8975 src2 = force_reg (mode, src2);
8977 src1 = force_reg (mode, src1);
8980 /* If the operation is not commutable, source 1 cannot be a constant
8981 or non-matching memory. */
8982 if ((CONSTANT_P (src1)
8983 || (!matching_memory && GET_CODE (src1) == MEM))
8984 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8985 src1 = force_reg (mode, src1);
8987 src1 = operands[1] = src1;
8988 src2 = operands[2] = src2;
8992 /* Similarly, but assume that the destination has already been
8996 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8997 enum machine_mode mode, rtx operands[])
8999 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9000 gcc_assert (dst == operands[0]);
9003 /* Attempt to expand a binary operator. Make the expansion closer to the
9004 actual machine, then just general_operand, which will allow 3 separate
9005 memory references (one output, two input) in a single insn. */
9008 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9011 rtx src1, src2, dst, op, clob;
9013 dst = ix86_fixup_binary_operands (code, mode, operands);
9017 /* Emit the instruction. */
9019 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9020 if (reload_in_progress)
9022 /* Reload doesn't know about the flags register, and doesn't know that
9023 it doesn't want to clobber it. We can only do this with PLUS. */
9024 gcc_assert (code == PLUS);
9029 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9030 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9033 /* Fix up the destination if needed. */
9034 if (dst != operands[0])
9035 emit_move_insn (operands[0], dst);
9038 /* Return TRUE or FALSE depending on whether the binary operator meets the
9039 appropriate constraints. */
9042 ix86_binary_operator_ok (enum rtx_code code,
9043 enum machine_mode mode ATTRIBUTE_UNUSED,
9046 /* Both source operands cannot be in memory. */
9047 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9049 /* If the operation is not commutable, source 1 cannot be a constant. */
9050 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9052 /* If the destination is memory, we must have a matching source operand. */
9053 if (GET_CODE (operands[0]) == MEM
9054 && ! (rtx_equal_p (operands[0], operands[1])
9055 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9056 && rtx_equal_p (operands[0], operands[2]))))
9058 /* If the operation is not commutable and the source 1 is memory, we must
9059 have a matching destination. */
9060 if (GET_CODE (operands[1]) == MEM
9061 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9062 && ! rtx_equal_p (operands[0], operands[1]))
9067 /* Attempt to expand a unary operator. Make the expansion closer to the
9068 actual machine, then just general_operand, which will allow 2 separate
9069 memory references (one output, one input) in a single insn. */
9072 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9075 int matching_memory;
9076 rtx src, dst, op, clob;
9081 /* If the destination is memory, and we do not have matching source
9082 operands, do things in registers. */
9083 matching_memory = 0;
9086 if (rtx_equal_p (dst, src))
9087 matching_memory = 1;
9089 dst = gen_reg_rtx (mode);
9092 /* When source operand is memory, destination must match. */
9093 if (MEM_P (src) && !matching_memory)
9094 src = force_reg (mode, src);
9096 /* Emit the instruction. */
9098 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9099 if (reload_in_progress || code == NOT)
9101 /* Reload doesn't know about the flags register, and doesn't know that
9102 it doesn't want to clobber it. */
9103 gcc_assert (code == NOT);
9108 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9109 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9112 /* Fix up the destination if needed. */
9113 if (dst != operands[0])
9114 emit_move_insn (operands[0], dst);
9117 /* Return TRUE or FALSE depending on whether the unary operator meets the
9118 appropriate constraints. */
9121 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9122 enum machine_mode mode ATTRIBUTE_UNUSED,
9123 rtx operands[2] ATTRIBUTE_UNUSED)
9125 /* If one of operands is memory, source and destination must match. */
9126 if ((GET_CODE (operands[0]) == MEM
9127 || GET_CODE (operands[1]) == MEM)
9128 && ! rtx_equal_p (operands[0], operands[1]))
9133 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9134 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9135 true, then replicate the mask for all elements of the vector register.
9136 If INVERT is true, then create a mask excluding the sign bit. */
9139 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9141 enum machine_mode vec_mode;
9142 HOST_WIDE_INT hi, lo;
9147 /* Find the sign bit, sign extended to 2*HWI. */
9149 lo = 0x80000000, hi = lo < 0;
9150 else if (HOST_BITS_PER_WIDE_INT >= 64)
9151 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9153 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9158 /* Force this value into the low part of a fp vector constant. */
9159 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9160 mask = gen_lowpart (mode, mask);
9165 v = gen_rtvec (4, mask, mask, mask, mask);
9167 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9168 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9169 vec_mode = V4SFmode;
9174 v = gen_rtvec (2, mask, mask);
9176 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9177 vec_mode = V2DFmode;
9180 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9183 /* Generate code for floating point ABS or NEG. */
9186 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9189 rtx mask, set, use, clob, dst, src;
9190 bool matching_memory;
9191 bool use_sse = false;
9192 bool vector_mode = VECTOR_MODE_P (mode);
9193 enum machine_mode elt_mode = mode;
9197 elt_mode = GET_MODE_INNER (mode);
9200 else if (TARGET_SSE_MATH)
9201 use_sse = SSE_FLOAT_MODE_P (mode);
9203 /* NEG and ABS performed with SSE use bitwise mask operations.
9204 Create the appropriate mask now. */
9206 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9209 /* When not using SSE, we don't use the mask, but prefer to keep the
9210 same general form of the insn pattern to reduce duplication when
9211 it comes time to split. */
9218 /* If the destination is memory, and we don't have matching source
9219 operands, do things in registers. */
9220 matching_memory = false;
9223 if (rtx_equal_p (dst, src))
9224 matching_memory = true;
9226 dst = gen_reg_rtx (mode);
9228 if (MEM_P (src) && !matching_memory)
9229 src = force_reg (mode, src);
9233 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9234 set = gen_rtx_SET (VOIDmode, dst, set);
9239 set = gen_rtx_fmt_e (code, mode, src);
9240 set = gen_rtx_SET (VOIDmode, dst, set);
9241 use = gen_rtx_USE (VOIDmode, mask);
9242 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9243 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9246 if (dst != operands[0])
9247 emit_move_insn (operands[0], dst);
9250 /* Expand a copysign operation. Special case operand 0 being a constant. */
9253 ix86_expand_copysign (rtx operands[])
9255 enum machine_mode mode, vmode;
9256 rtx dest, op0, op1, mask, nmask;
9262 mode = GET_MODE (dest);
9263 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9265 if (GET_CODE (op0) == CONST_DOUBLE)
9269 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9270 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9272 if (op0 == CONST0_RTX (mode))
9273 op0 = CONST0_RTX (vmode);
9277 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9278 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9280 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9281 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9284 mask = ix86_build_signbit_mask (mode, 0, 0);
9287 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9289 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9293 nmask = ix86_build_signbit_mask (mode, 0, 1);
9294 mask = ix86_build_signbit_mask (mode, 0, 0);
9297 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9299 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9303 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9304 be a constant, and so has already been expanded into a vector constant. */
9307 ix86_split_copysign_const (rtx operands[])
9309 enum machine_mode mode, vmode;
9310 rtx dest, op0, op1, mask, x;
9317 mode = GET_MODE (dest);
9318 vmode = GET_MODE (mask);
9320 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9321 x = gen_rtx_AND (vmode, dest, mask);
9322 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9324 if (op0 != CONST0_RTX (vmode))
9326 x = gen_rtx_IOR (vmode, dest, op0);
9327 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9331 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9332 so we have to do two masks. */
9335 ix86_split_copysign_var (rtx operands[])
9337 enum machine_mode mode, vmode;
9338 rtx dest, scratch, op0, op1, mask, nmask, x;
9341 scratch = operands[1];
9344 nmask = operands[4];
9347 mode = GET_MODE (dest);
9348 vmode = GET_MODE (mask);
9350 if (rtx_equal_p (op0, op1))
9352 /* Shouldn't happen often (it's useless, obviously), but when it does
9353 we'd generate incorrect code if we continue below. */
9354 emit_move_insn (dest, op0);
9358 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9360 gcc_assert (REGNO (op1) == REGNO (scratch));
9362 x = gen_rtx_AND (vmode, scratch, mask);
9363 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9366 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9367 x = gen_rtx_NOT (vmode, dest);
9368 x = gen_rtx_AND (vmode, x, op0);
9369 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9373 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9375 x = gen_rtx_AND (vmode, scratch, mask);
9377 else /* alternative 2,4 */
9379 gcc_assert (REGNO (mask) == REGNO (scratch));
9380 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9381 x = gen_rtx_AND (vmode, scratch, op1);
9383 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9385 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9387 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9388 x = gen_rtx_AND (vmode, dest, nmask);
9390 else /* alternative 3,4 */
9392 gcc_assert (REGNO (nmask) == REGNO (dest));
9394 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9395 x = gen_rtx_AND (vmode, dest, op0);
9397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9400 x = gen_rtx_IOR (vmode, dest, scratch);
9401 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9404 /* Return TRUE or FALSE depending on whether the first SET in INSN
9405 has source and destination with matching CC modes, and that the
9406 CC mode is at least as constrained as REQ_MODE. */
9409 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9412 enum machine_mode set_mode;
9414 set = PATTERN (insn);
9415 if (GET_CODE (set) == PARALLEL)
9416 set = XVECEXP (set, 0, 0);
9417 gcc_assert (GET_CODE (set) == SET);
9418 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9420 set_mode = GET_MODE (SET_DEST (set));
9424 if (req_mode != CCNOmode
9425 && (req_mode != CCmode
9426 || XEXP (SET_SRC (set), 1) != const0_rtx))
9430 if (req_mode == CCGCmode)
9434 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9438 if (req_mode == CCZmode)
9448 return (GET_MODE (SET_SRC (set)) == set_mode);
9451 /* Generate insn patterns to do an integer compare of OPERANDS. */
9454 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9456 enum machine_mode cmpmode;
9459 cmpmode = SELECT_CC_MODE (code, op0, op1);
9460 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9462 /* This is very simple, but making the interface the same as in the
9463 FP case makes the rest of the code easier. */
9464 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9465 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9467 /* Return the test that should be put into the flags user, i.e.
9468 the bcc, scc, or cmov instruction. */
9469 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9472 /* Figure out whether to use ordered or unordered fp comparisons.
9473 Return the appropriate mode to use. */
9476 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9478 /* ??? In order to make all comparisons reversible, we do all comparisons
9479 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9480 all forms trapping and nontrapping comparisons, we can make inequality
9481 comparisons trapping again, since it results in better code when using
9482 FCOM based compares. */
9483 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9487 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9489 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9490 return ix86_fp_compare_mode (code);
9493 /* Only zero flag is needed. */
9495 case NE: /* ZF!=0 */
9497 /* Codes needing carry flag. */
9498 case GEU: /* CF=0 */
9499 case GTU: /* CF=0 & ZF=0 */
9500 case LTU: /* CF=1 */
9501 case LEU: /* CF=1 | ZF=1 */
9503 /* Codes possibly doable only with sign flag when
9504 comparing against zero. */
9505 case GE: /* SF=OF or SF=0 */
9506 case LT: /* SF<>OF or SF=1 */
9507 if (op1 == const0_rtx)
9510 /* For other cases Carry flag is not required. */
9512 /* Codes doable only with sign flag when comparing
9513 against zero, but we miss jump instruction for it
9514 so we need to use relational tests against overflow
9515 that thus needs to be zero. */
9516 case GT: /* ZF=0 & SF=OF */
9517 case LE: /* ZF=1 | SF<>OF */
9518 if (op1 == const0_rtx)
9522 /* strcmp pattern do (use flags) and combine may ask us for proper
9531 /* Return the fixed registers used for condition codes. */
9534 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9541 /* If two condition code modes are compatible, return a condition code
9542 mode which is compatible with both. Otherwise, return
9545 static enum machine_mode
9546 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9551 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9554 if ((m1 == CCGCmode && m2 == CCGOCmode)
9555 || (m1 == CCGOCmode && m2 == CCGCmode))
9583 /* These are only compatible with themselves, which we already
9589 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9592 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9594 enum rtx_code swapped_code = swap_condition (code);
9595 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9596 || (ix86_fp_comparison_cost (swapped_code)
9597 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9600 /* Swap, force into registers, or otherwise massage the two operands
9601 to a fp comparison. The operands are updated in place; the new
9602 comparison code is returned. */
9604 static enum rtx_code
9605 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9607 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9608 rtx op0 = *pop0, op1 = *pop1;
9609 enum machine_mode op_mode = GET_MODE (op0);
9610 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9612 /* All of the unordered compare instructions only work on registers.
9613 The same is true of the fcomi compare instructions. The XFmode
9614 compare instructions require registers except when comparing
9615 against zero or when converting operand 1 from fixed point to
9619 && (fpcmp_mode == CCFPUmode
9620 || (op_mode == XFmode
9621 && ! (standard_80387_constant_p (op0) == 1
9622 || standard_80387_constant_p (op1) == 1)
9623 && GET_CODE (op1) != FLOAT)
9624 || ix86_use_fcomi_compare (code)))
9626 op0 = force_reg (op_mode, op0);
9627 op1 = force_reg (op_mode, op1);
9631 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9632 things around if they appear profitable, otherwise force op0
9635 if (standard_80387_constant_p (op0) == 0
9636 || (GET_CODE (op0) == MEM
9637 && ! (standard_80387_constant_p (op1) == 0
9638 || GET_CODE (op1) == MEM)))
9641 tmp = op0, op0 = op1, op1 = tmp;
9642 code = swap_condition (code);
9645 if (GET_CODE (op0) != REG)
9646 op0 = force_reg (op_mode, op0);
9648 if (CONSTANT_P (op1))
9650 int tmp = standard_80387_constant_p (op1);
9652 op1 = validize_mem (force_const_mem (op_mode, op1));
9656 op1 = force_reg (op_mode, op1);
9659 op1 = force_reg (op_mode, op1);
9663 /* Try to rearrange the comparison to make it cheaper. */
9664 if (ix86_fp_comparison_cost (code)
9665 > ix86_fp_comparison_cost (swap_condition (code))
9666 && (GET_CODE (op1) == REG || !no_new_pseudos))
9669 tmp = op0, op0 = op1, op1 = tmp;
9670 code = swap_condition (code);
9671 if (GET_CODE (op0) != REG)
9672 op0 = force_reg (op_mode, op0);
9680 /* Convert comparison codes we use to represent FP comparison to integer
9681 code that will result in proper branch. Return UNKNOWN if no such code
9685 ix86_fp_compare_code_to_integer (enum rtx_code code)
9714 /* Split comparison code CODE into comparisons we can do using branch
9715 instructions. BYPASS_CODE is comparison code for branch that will
9716 branch around FIRST_CODE and SECOND_CODE. If some of branches
9717 is not required, set value to UNKNOWN.
9718 We never require more than two branches. */
9721 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9722 enum rtx_code *first_code,
9723 enum rtx_code *second_code)
9726 *bypass_code = UNKNOWN;
9727 *second_code = UNKNOWN;
9729 /* The fcomi comparison sets flags as follows:
9739 case GT: /* GTU - CF=0 & ZF=0 */
9740 case GE: /* GEU - CF=0 */
9741 case ORDERED: /* PF=0 */
9742 case UNORDERED: /* PF=1 */
9743 case UNEQ: /* EQ - ZF=1 */
9744 case UNLT: /* LTU - CF=1 */
9745 case UNLE: /* LEU - CF=1 | ZF=1 */
9746 case LTGT: /* EQ - ZF=0 */
9748 case LT: /* LTU - CF=1 - fails on unordered */
9750 *bypass_code = UNORDERED;
9752 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9754 *bypass_code = UNORDERED;
9756 case EQ: /* EQ - ZF=1 - fails on unordered */
9758 *bypass_code = UNORDERED;
9760 case NE: /* NE - ZF=0 - fails on unordered */
9762 *second_code = UNORDERED;
9764 case UNGE: /* GEU - CF=0 - fails on unordered */
9766 *second_code = UNORDERED;
9768 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9770 *second_code = UNORDERED;
9775 if (!TARGET_IEEE_FP)
9777 *second_code = UNKNOWN;
9778 *bypass_code = UNKNOWN;
9782 /* Return cost of comparison done fcom + arithmetics operations on AX.
9783 All following functions do use number of instructions as a cost metrics.
9784 In future this should be tweaked to compute bytes for optimize_size and
9785 take into account performance of various instructions on various CPUs. */
9787 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9789 if (!TARGET_IEEE_FP)
9791 /* The cost of code output by ix86_expand_fp_compare. */
9819 /* Return cost of comparison done using fcomi operation.
9820 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9822 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9824 enum rtx_code bypass_code, first_code, second_code;
9825 /* Return arbitrarily high cost when instruction is not supported - this
9826 prevents gcc from using it. */
9829 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9830 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9833 /* Return cost of comparison done using sahf operation.
9834 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9836 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9838 enum rtx_code bypass_code, first_code, second_code;
9839 /* Return arbitrarily high cost when instruction is not preferred - this
9840 avoids gcc from using it. */
9841 if (!TARGET_USE_SAHF && !optimize_size)
9843 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9844 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9847 /* Compute cost of the comparison done using any method.
9848 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9850 ix86_fp_comparison_cost (enum rtx_code code)
9852 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9855 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9856 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9858 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9859 if (min > sahf_cost)
9861 if (min > fcomi_cost)
9866 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9869 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9870 rtx *second_test, rtx *bypass_test)
9872 enum machine_mode fpcmp_mode, intcmp_mode;
9874 int cost = ix86_fp_comparison_cost (code);
9875 enum rtx_code bypass_code, first_code, second_code;
9877 fpcmp_mode = ix86_fp_compare_mode (code);
9878 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9881 *second_test = NULL_RTX;
9883 *bypass_test = NULL_RTX;
9885 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9887 /* Do fcomi/sahf based test when profitable. */
9888 if ((bypass_code == UNKNOWN || bypass_test)
9889 && (second_code == UNKNOWN || second_test)
9890 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9894 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9895 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9901 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9902 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9904 scratch = gen_reg_rtx (HImode);
9905 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9906 emit_insn (gen_x86_sahf_1 (scratch));
9909 /* The FP codes work out to act like unsigned. */
9910 intcmp_mode = fpcmp_mode;
9912 if (bypass_code != UNKNOWN)
9913 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9914 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9916 if (second_code != UNKNOWN)
9917 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9918 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9923 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9924 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9925 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9927 scratch = gen_reg_rtx (HImode);
9928 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9930 /* In the unordered case, we have to check C2 for NaN's, which
9931 doesn't happen to work out to anything nice combination-wise.
9932 So do some bit twiddling on the value we've got in AH to come
9933 up with an appropriate set of condition codes. */
9935 intcmp_mode = CCNOmode;
9940 if (code == GT || !TARGET_IEEE_FP)
9942 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9947 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9948 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9949 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9950 intcmp_mode = CCmode;
9956 if (code == LT && TARGET_IEEE_FP)
9958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9959 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9960 intcmp_mode = CCmode;
9965 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9971 if (code == GE || !TARGET_IEEE_FP)
9973 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9978 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9979 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9986 if (code == LE && TARGET_IEEE_FP)
9988 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9989 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9990 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9991 intcmp_mode = CCmode;
9996 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10002 if (code == EQ && TARGET_IEEE_FP)
10004 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10005 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10006 intcmp_mode = CCmode;
10011 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10018 if (code == NE && TARGET_IEEE_FP)
10020 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10021 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10027 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10037 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10042 gcc_unreachable ();
10046 /* Return the test that should be put into the flags user, i.e.
10047 the bcc, scc, or cmov instruction. */
10048 return gen_rtx_fmt_ee (code, VOIDmode,
10049 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10054 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10057 op0 = ix86_compare_op0;
10058 op1 = ix86_compare_op1;
10061 *second_test = NULL_RTX;
10063 *bypass_test = NULL_RTX;
10065 if (ix86_compare_emitted)
10067 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10068 ix86_compare_emitted = NULL_RTX;
10070 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10071 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10072 second_test, bypass_test);
10074 ret = ix86_expand_int_compare (code, op0, op1);
10079 /* Return true if the CODE will result in nontrivial jump sequence. */
10081 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10083 enum rtx_code bypass_code, first_code, second_code;
10086 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10087 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10091 ix86_expand_branch (enum rtx_code code, rtx label)
10095 switch (GET_MODE (ix86_compare_op0))
10101 tmp = ix86_expand_compare (code, NULL, NULL);
10102 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10103 gen_rtx_LABEL_REF (VOIDmode, label),
10105 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10114 enum rtx_code bypass_code, first_code, second_code;
10116 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10117 &ix86_compare_op1);
10119 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10121 /* Check whether we will use the natural sequence with one jump. If
10122 so, we can expand jump early. Otherwise delay expansion by
10123 creating compound insn to not confuse optimizers. */
10124 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10127 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10128 gen_rtx_LABEL_REF (VOIDmode, label),
10129 pc_rtx, NULL_RTX, NULL_RTX);
10133 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10134 ix86_compare_op0, ix86_compare_op1);
10135 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10136 gen_rtx_LABEL_REF (VOIDmode, label),
10138 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10140 use_fcomi = ix86_use_fcomi_compare (code);
10141 vec = rtvec_alloc (3 + !use_fcomi);
10142 RTVEC_ELT (vec, 0) = tmp;
10144 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10146 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10149 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10151 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10160 /* Expand DImode branch into multiple compare+branch. */
10162 rtx lo[2], hi[2], label2;
10163 enum rtx_code code1, code2, code3;
10164 enum machine_mode submode;
10166 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10168 tmp = ix86_compare_op0;
10169 ix86_compare_op0 = ix86_compare_op1;
10170 ix86_compare_op1 = tmp;
10171 code = swap_condition (code);
10173 if (GET_MODE (ix86_compare_op0) == DImode)
10175 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10176 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10181 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10182 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10186 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10187 avoid two branches. This costs one extra insn, so disable when
10188 optimizing for size. */
10190 if ((code == EQ || code == NE)
10192 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10197 if (hi[1] != const0_rtx)
10198 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10199 NULL_RTX, 0, OPTAB_WIDEN);
10202 if (lo[1] != const0_rtx)
10203 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10204 NULL_RTX, 0, OPTAB_WIDEN);
10206 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10207 NULL_RTX, 0, OPTAB_WIDEN);
10209 ix86_compare_op0 = tmp;
10210 ix86_compare_op1 = const0_rtx;
10211 ix86_expand_branch (code, label);
10215 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10216 op1 is a constant and the low word is zero, then we can just
10217 examine the high word. */
10219 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10222 case LT: case LTU: case GE: case GEU:
10223 ix86_compare_op0 = hi[0];
10224 ix86_compare_op1 = hi[1];
10225 ix86_expand_branch (code, label);
10231 /* Otherwise, we need two or three jumps. */
10233 label2 = gen_label_rtx ();
10236 code2 = swap_condition (code);
10237 code3 = unsigned_condition (code);
10241 case LT: case GT: case LTU: case GTU:
10244 case LE: code1 = LT; code2 = GT; break;
10245 case GE: code1 = GT; code2 = LT; break;
10246 case LEU: code1 = LTU; code2 = GTU; break;
10247 case GEU: code1 = GTU; code2 = LTU; break;
10249 case EQ: code1 = UNKNOWN; code2 = NE; break;
10250 case NE: code2 = UNKNOWN; break;
10253 gcc_unreachable ();
10258 * if (hi(a) < hi(b)) goto true;
10259 * if (hi(a) > hi(b)) goto false;
10260 * if (lo(a) < lo(b)) goto true;
10264 ix86_compare_op0 = hi[0];
10265 ix86_compare_op1 = hi[1];
10267 if (code1 != UNKNOWN)
10268 ix86_expand_branch (code1, label);
10269 if (code2 != UNKNOWN)
10270 ix86_expand_branch (code2, label2);
10272 ix86_compare_op0 = lo[0];
10273 ix86_compare_op1 = lo[1];
10274 ix86_expand_branch (code3, label);
10276 if (code2 != UNKNOWN)
10277 emit_label (label2);
10282 gcc_unreachable ();
10286 /* Split branch based on floating point condition. */
10288 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10289 rtx target1, rtx target2, rtx tmp, rtx pushed)
10291 rtx second, bypass;
10292 rtx label = NULL_RTX;
10294 int bypass_probability = -1, second_probability = -1, probability = -1;
10297 if (target2 != pc_rtx)
10300 code = reverse_condition_maybe_unordered (code);
10305 condition = ix86_expand_fp_compare (code, op1, op2,
10306 tmp, &second, &bypass);
10308 /* Remove pushed operand from stack. */
10310 ix86_free_from_memory (GET_MODE (pushed));
10312 if (split_branch_probability >= 0)
10314 /* Distribute the probabilities across the jumps.
10315 Assume the BYPASS and SECOND to be always test
10317 probability = split_branch_probability;
10319 /* Value of 1 is low enough to make no need for probability
10320 to be updated. Later we may run some experiments and see
10321 if unordered values are more frequent in practice. */
10323 bypass_probability = 1;
10325 second_probability = 1;
10327 if (bypass != NULL_RTX)
10329 label = gen_label_rtx ();
10330 i = emit_jump_insn (gen_rtx_SET
10332 gen_rtx_IF_THEN_ELSE (VOIDmode,
10334 gen_rtx_LABEL_REF (VOIDmode,
10337 if (bypass_probability >= 0)
10339 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10340 GEN_INT (bypass_probability),
10343 i = emit_jump_insn (gen_rtx_SET
10345 gen_rtx_IF_THEN_ELSE (VOIDmode,
10346 condition, target1, target2)));
10347 if (probability >= 0)
10349 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10350 GEN_INT (probability),
10352 if (second != NULL_RTX)
10354 i = emit_jump_insn (gen_rtx_SET
10356 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10358 if (second_probability >= 0)
10360 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10361 GEN_INT (second_probability),
10364 if (label != NULL_RTX)
10365 emit_label (label);
10369 ix86_expand_setcc (enum rtx_code code, rtx dest)
10371 rtx ret, tmp, tmpreg, equiv;
10372 rtx second_test, bypass_test;
10374 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10375 return 0; /* FAIL */
10377 gcc_assert (GET_MODE (dest) == QImode);
10379 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10380 PUT_MODE (ret, QImode);
10385 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10386 if (bypass_test || second_test)
10388 rtx test = second_test;
10390 rtx tmp2 = gen_reg_rtx (QImode);
10393 gcc_assert (!second_test);
10394 test = bypass_test;
10396 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10398 PUT_MODE (test, QImode);
10399 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10402 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10404 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10407 /* Attach a REG_EQUAL note describing the comparison result. */
10408 if (ix86_compare_op0 && ix86_compare_op1)
10410 equiv = simplify_gen_relational (code, QImode,
10411 GET_MODE (ix86_compare_op0),
10412 ix86_compare_op0, ix86_compare_op1);
10413 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10416 return 1; /* DONE */
10419 /* Expand comparison setting or clearing carry flag. Return true when
10420 successful and set pop for the operation. */
10422 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10424 enum machine_mode mode =
10425 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10427 /* Do not handle DImode compares that go trought special path. Also we can't
10428 deal with FP compares yet. This is possible to add. */
10429 if (mode == (TARGET_64BIT ? TImode : DImode))
10431 if (FLOAT_MODE_P (mode))
10433 rtx second_test = NULL, bypass_test = NULL;
10434 rtx compare_op, compare_seq;
10436 /* Shortcut: following common codes never translate into carry flag compares. */
10437 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10438 || code == ORDERED || code == UNORDERED)
10441 /* These comparisons require zero flag; swap operands so they won't. */
10442 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10443 && !TARGET_IEEE_FP)
10448 code = swap_condition (code);
10451 /* Try to expand the comparison and verify that we end up with carry flag
10452 based comparison. This is fails to be true only when we decide to expand
10453 comparison using arithmetic that is not too common scenario. */
10455 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10456 &second_test, &bypass_test);
10457 compare_seq = get_insns ();
10460 if (second_test || bypass_test)
10462 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10463 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10464 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10466 code = GET_CODE (compare_op);
10467 if (code != LTU && code != GEU)
10469 emit_insn (compare_seq);
10473 if (!INTEGRAL_MODE_P (mode))
10481 /* Convert a==0 into (unsigned)a<1. */
10484 if (op1 != const0_rtx)
10487 code = (code == EQ ? LTU : GEU);
10490 /* Convert a>b into b<a or a>=b-1. */
10493 if (GET_CODE (op1) == CONST_INT)
10495 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10496 /* Bail out on overflow. We still can swap operands but that
10497 would force loading of the constant into register. */
10498 if (op1 == const0_rtx
10499 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10501 code = (code == GTU ? GEU : LTU);
10508 code = (code == GTU ? LTU : GEU);
10512 /* Convert a>=0 into (unsigned)a<0x80000000. */
10515 if (mode == DImode || op1 != const0_rtx)
10517 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10518 code = (code == LT ? GEU : LTU);
10522 if (mode == DImode || op1 != constm1_rtx)
10524 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10525 code = (code == LE ? GEU : LTU);
10531 /* Swapping operands may cause constant to appear as first operand. */
10532 if (!nonimmediate_operand (op0, VOIDmode))
10534 if (no_new_pseudos)
10536 op0 = force_reg (mode, op0);
10538 ix86_compare_op0 = op0;
10539 ix86_compare_op1 = op1;
10540 *pop = ix86_expand_compare (code, NULL, NULL);
10541 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10546 ix86_expand_int_movcc (rtx operands[])
10548 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10549 rtx compare_seq, compare_op;
10550 rtx second_test, bypass_test;
10551 enum machine_mode mode = GET_MODE (operands[0]);
10552 bool sign_bit_compare_p = false;;
10555 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10556 compare_seq = get_insns ();
10559 compare_code = GET_CODE (compare_op);
10561 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10562 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10563 sign_bit_compare_p = true;
10565 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10566 HImode insns, we'd be swallowed in word prefix ops. */
10568 if ((mode != HImode || TARGET_FAST_PREFIX)
10569 && (mode != (TARGET_64BIT ? TImode : DImode))
10570 && GET_CODE (operands[2]) == CONST_INT
10571 && GET_CODE (operands[3]) == CONST_INT)
10573 rtx out = operands[0];
10574 HOST_WIDE_INT ct = INTVAL (operands[2]);
10575 HOST_WIDE_INT cf = INTVAL (operands[3]);
10576 HOST_WIDE_INT diff;
10579 /* Sign bit compares are better done using shifts than we do by using
10581 if (sign_bit_compare_p
10582 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10583 ix86_compare_op1, &compare_op))
10585 /* Detect overlap between destination and compare sources. */
10588 if (!sign_bit_compare_p)
10590 bool fpcmp = false;
10592 compare_code = GET_CODE (compare_op);
10594 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10595 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10598 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10601 /* To simplify rest of code, restrict to the GEU case. */
10602 if (compare_code == LTU)
10604 HOST_WIDE_INT tmp = ct;
10607 compare_code = reverse_condition (compare_code);
10608 code = reverse_condition (code);
10613 PUT_CODE (compare_op,
10614 reverse_condition_maybe_unordered
10615 (GET_CODE (compare_op)));
10617 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10621 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10622 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10623 tmp = gen_reg_rtx (mode);
10625 if (mode == DImode)
10626 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10628 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10632 if (code == GT || code == GE)
10633 code = reverse_condition (code);
10636 HOST_WIDE_INT tmp = ct;
10641 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10642 ix86_compare_op1, VOIDmode, 0, -1);
10655 tmp = expand_simple_binop (mode, PLUS,
10657 copy_rtx (tmp), 1, OPTAB_DIRECT);
10668 tmp = expand_simple_binop (mode, IOR,
10670 copy_rtx (tmp), 1, OPTAB_DIRECT);
10672 else if (diff == -1 && ct)
10682 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10684 tmp = expand_simple_binop (mode, PLUS,
10685 copy_rtx (tmp), GEN_INT (cf),
10686 copy_rtx (tmp), 1, OPTAB_DIRECT);
10694 * andl cf - ct, dest
10704 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10707 tmp = expand_simple_binop (mode, AND,
10709 gen_int_mode (cf - ct, mode),
10710 copy_rtx (tmp), 1, OPTAB_DIRECT);
10712 tmp = expand_simple_binop (mode, PLUS,
10713 copy_rtx (tmp), GEN_INT (ct),
10714 copy_rtx (tmp), 1, OPTAB_DIRECT);
10717 if (!rtx_equal_p (tmp, out))
10718 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10720 return 1; /* DONE */
10726 tmp = ct, ct = cf, cf = tmp;
10728 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10730 /* We may be reversing unordered compare to normal compare, that
10731 is not valid in general (we may convert non-trapping condition
10732 to trapping one), however on i386 we currently emit all
10733 comparisons unordered. */
10734 compare_code = reverse_condition_maybe_unordered (compare_code);
10735 code = reverse_condition_maybe_unordered (code);
10739 compare_code = reverse_condition (compare_code);
10740 code = reverse_condition (code);
10744 compare_code = UNKNOWN;
10745 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10746 && GET_CODE (ix86_compare_op1) == CONST_INT)
10748 if (ix86_compare_op1 == const0_rtx
10749 && (code == LT || code == GE))
10750 compare_code = code;
10751 else if (ix86_compare_op1 == constm1_rtx)
10755 else if (code == GT)
10760 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10761 if (compare_code != UNKNOWN
10762 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10763 && (cf == -1 || ct == -1))
10765 /* If lea code below could be used, only optimize
10766 if it results in a 2 insn sequence. */
10768 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10769 || diff == 3 || diff == 5 || diff == 9)
10770 || (compare_code == LT && ct == -1)
10771 || (compare_code == GE && cf == -1))
10774 * notl op1 (if necessary)
10782 code = reverse_condition (code);
10785 out = emit_store_flag (out, code, ix86_compare_op0,
10786 ix86_compare_op1, VOIDmode, 0, -1);
10788 out = expand_simple_binop (mode, IOR,
10790 out, 1, OPTAB_DIRECT);
10791 if (out != operands[0])
10792 emit_move_insn (operands[0], out);
10794 return 1; /* DONE */
10799 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10800 || diff == 3 || diff == 5 || diff == 9)
10801 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10803 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10809 * lea cf(dest*(ct-cf)),dest
10813 * This also catches the degenerate setcc-only case.
10819 out = emit_store_flag (out, code, ix86_compare_op0,
10820 ix86_compare_op1, VOIDmode, 0, 1);
10823 /* On x86_64 the lea instruction operates on Pmode, so we need
10824 to get arithmetics done in proper mode to match. */
10826 tmp = copy_rtx (out);
10830 out1 = copy_rtx (out);
10831 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10835 tmp = gen_rtx_PLUS (mode, tmp, out1);
10841 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10844 if (!rtx_equal_p (tmp, out))
10847 out = force_operand (tmp, copy_rtx (out));
10849 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10851 if (!rtx_equal_p (out, operands[0]))
10852 emit_move_insn (operands[0], copy_rtx (out));
10854 return 1; /* DONE */
10858 * General case: Jumpful:
10859 * xorl dest,dest cmpl op1, op2
10860 * cmpl op1, op2 movl ct, dest
10861 * setcc dest jcc 1f
10862 * decl dest movl cf, dest
10863 * andl (cf-ct),dest 1:
10866 * Size 20. Size 14.
10868 * This is reasonably steep, but branch mispredict costs are
10869 * high on modern cpus, so consider failing only if optimizing
10873 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10874 && BRANCH_COST >= 2)
10880 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10881 /* We may be reversing unordered compare to normal compare,
10882 that is not valid in general (we may convert non-trapping
10883 condition to trapping one), however on i386 we currently
10884 emit all comparisons unordered. */
10885 code = reverse_condition_maybe_unordered (code);
10888 code = reverse_condition (code);
10889 if (compare_code != UNKNOWN)
10890 compare_code = reverse_condition (compare_code);
10894 if (compare_code != UNKNOWN)
10896 /* notl op1 (if needed)
10901 For x < 0 (resp. x <= -1) there will be no notl,
10902 so if possible swap the constants to get rid of the
10904 True/false will be -1/0 while code below (store flag
10905 followed by decrement) is 0/-1, so the constants need
10906 to be exchanged once more. */
10908 if (compare_code == GE || !cf)
10910 code = reverse_condition (code);
10915 HOST_WIDE_INT tmp = cf;
10920 out = emit_store_flag (out, code, ix86_compare_op0,
10921 ix86_compare_op1, VOIDmode, 0, -1);
10925 out = emit_store_flag (out, code, ix86_compare_op0,
10926 ix86_compare_op1, VOIDmode, 0, 1);
10928 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10929 copy_rtx (out), 1, OPTAB_DIRECT);
10932 out = expand_simple_binop (mode, AND, copy_rtx (out),
10933 gen_int_mode (cf - ct, mode),
10934 copy_rtx (out), 1, OPTAB_DIRECT);
10936 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10937 copy_rtx (out), 1, OPTAB_DIRECT);
10938 if (!rtx_equal_p (out, operands[0]))
10939 emit_move_insn (operands[0], copy_rtx (out));
10941 return 1; /* DONE */
10945 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10947 /* Try a few things more with specific constants and a variable. */
10950 rtx var, orig_out, out, tmp;
10952 if (BRANCH_COST <= 2)
10953 return 0; /* FAIL */
10955 /* If one of the two operands is an interesting constant, load a
10956 constant with the above and mask it in with a logical operation. */
10958 if (GET_CODE (operands[2]) == CONST_INT)
10961 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10962 operands[3] = constm1_rtx, op = and_optab;
10963 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10964 operands[3] = const0_rtx, op = ior_optab;
10966 return 0; /* FAIL */
10968 else if (GET_CODE (operands[3]) == CONST_INT)
10971 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10972 operands[2] = constm1_rtx, op = and_optab;
10973 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10974 operands[2] = const0_rtx, op = ior_optab;
10976 return 0; /* FAIL */
10979 return 0; /* FAIL */
10981 orig_out = operands[0];
10982 tmp = gen_reg_rtx (mode);
10985 /* Recurse to get the constant loaded. */
10986 if (ix86_expand_int_movcc (operands) == 0)
10987 return 0; /* FAIL */
10989 /* Mask in the interesting variable. */
10990 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10992 if (!rtx_equal_p (out, orig_out))
10993 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10995 return 1; /* DONE */
10999 * For comparison with above,
11009 if (! nonimmediate_operand (operands[2], mode))
11010 operands[2] = force_reg (mode, operands[2]);
11011 if (! nonimmediate_operand (operands[3], mode))
11012 operands[3] = force_reg (mode, operands[3]);
11014 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11016 rtx tmp = gen_reg_rtx (mode);
11017 emit_move_insn (tmp, operands[3]);
11020 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11022 rtx tmp = gen_reg_rtx (mode);
11023 emit_move_insn (tmp, operands[2]);
11027 if (! register_operand (operands[2], VOIDmode)
11029 || ! register_operand (operands[3], VOIDmode)))
11030 operands[2] = force_reg (mode, operands[2]);
11033 && ! register_operand (operands[3], VOIDmode))
11034 operands[3] = force_reg (mode, operands[3]);
11036 emit_insn (compare_seq);
11037 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11038 gen_rtx_IF_THEN_ELSE (mode,
11039 compare_op, operands[2],
11042 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11043 gen_rtx_IF_THEN_ELSE (mode,
11045 copy_rtx (operands[3]),
11046 copy_rtx (operands[0]))));
11048 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11049 gen_rtx_IF_THEN_ELSE (mode,
11051 copy_rtx (operands[2]),
11052 copy_rtx (operands[0]))));
11054 return 1; /* DONE */
11057 /* Swap, force into registers, or otherwise massage the two operands
11058 to an sse comparison with a mask result. Thus we differ a bit from
11059 ix86_prepare_fp_compare_args which expects to produce a flags result.
11061 The DEST operand exists to help determine whether to commute commutative
11062 operators. The POP0/POP1 operands are updated in place. The new
11063 comparison code is returned, or UNKNOWN if not implementable. */
11065 static enum rtx_code
11066 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11067 rtx *pop0, rtx *pop1)
11075 /* We have no LTGT as an operator. We could implement it with
11076 NE & ORDERED, but this requires an extra temporary. It's
11077 not clear that it's worth it. */
11084 /* These are supported directly. */
11091 /* For commutative operators, try to canonicalize the destination
11092 operand to be first in the comparison - this helps reload to
11093 avoid extra moves. */
11094 if (!dest || !rtx_equal_p (dest, *pop1))
11102 /* These are not supported directly. Swap the comparison operands
11103 to transform into something that is supported. */
11107 code = swap_condition (code);
11111 gcc_unreachable ();
11117 /* Detect conditional moves that exactly match min/max operational
11118 semantics. Note that this is IEEE safe, as long as we don't
11119 interchange the operands.
11121 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11122 and TRUE if the operation is successful and instructions are emitted. */
11125 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11126 rtx cmp_op1, rtx if_true, rtx if_false)
11128 enum machine_mode mode;
11134 else if (code == UNGE)
11137 if_true = if_false;
11143 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11145 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11150 mode = GET_MODE (dest);
11152 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11153 but MODE may be a vector mode and thus not appropriate. */
11154 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11156 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11159 if_true = force_reg (mode, if_true);
11160 v = gen_rtvec (2, if_true, if_false);
11161 tmp = gen_rtx_UNSPEC (mode, v, u);
11165 code = is_min ? SMIN : SMAX;
11166 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11169 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11173 /* Expand an sse vector comparison. Return the register with the result. */
11176 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11177 rtx op_true, rtx op_false)
11179 enum machine_mode mode = GET_MODE (dest);
11182 cmp_op0 = force_reg (mode, cmp_op0);
11183 if (!nonimmediate_operand (cmp_op1, mode))
11184 cmp_op1 = force_reg (mode, cmp_op1);
11187 || reg_overlap_mentioned_p (dest, op_true)
11188 || reg_overlap_mentioned_p (dest, op_false))
11189 dest = gen_reg_rtx (mode);
11191 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11197 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11198 operations. This is used for both scalar and vector conditional moves. */
11201 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11203 enum machine_mode mode = GET_MODE (dest);
11206 if (op_false == CONST0_RTX (mode))
11208 op_true = force_reg (mode, op_true);
11209 x = gen_rtx_AND (mode, cmp, op_true);
11210 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11212 else if (op_true == CONST0_RTX (mode))
11214 op_false = force_reg (mode, op_false);
11215 x = gen_rtx_NOT (mode, cmp);
11216 x = gen_rtx_AND (mode, x, op_false);
11217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11221 op_true = force_reg (mode, op_true);
11222 op_false = force_reg (mode, op_false);
11224 t2 = gen_reg_rtx (mode);
11226 t3 = gen_reg_rtx (mode);
11230 x = gen_rtx_AND (mode, op_true, cmp);
11231 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11233 x = gen_rtx_NOT (mode, cmp);
11234 x = gen_rtx_AND (mode, x, op_false);
11235 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11237 x = gen_rtx_IOR (mode, t3, t2);
11238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11242 /* Expand a floating-point conditional move. Return true if successful. */
11245 ix86_expand_fp_movcc (rtx operands[])
11247 enum machine_mode mode = GET_MODE (operands[0]);
11248 enum rtx_code code = GET_CODE (operands[1]);
11249 rtx tmp, compare_op, second_test, bypass_test;
11251 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11253 enum machine_mode cmode;
11255 /* Since we've no cmove for sse registers, don't force bad register
11256 allocation just to gain access to it. Deny movcc when the
11257 comparison mode doesn't match the move mode. */
11258 cmode = GET_MODE (ix86_compare_op0);
11259 if (cmode == VOIDmode)
11260 cmode = GET_MODE (ix86_compare_op1);
11264 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11266 &ix86_compare_op1);
11267 if (code == UNKNOWN)
11270 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11271 ix86_compare_op1, operands[2],
11275 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11276 ix86_compare_op1, operands[2], operands[3]);
11277 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11281 /* The floating point conditional move instructions don't directly
11282 support conditions resulting from a signed integer comparison. */
11284 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11286 /* The floating point conditional move instructions don't directly
11287 support signed integer comparisons. */
11289 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11291 gcc_assert (!second_test && !bypass_test);
11292 tmp = gen_reg_rtx (QImode);
11293 ix86_expand_setcc (code, tmp);
11295 ix86_compare_op0 = tmp;
11296 ix86_compare_op1 = const0_rtx;
11297 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11299 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11301 tmp = gen_reg_rtx (mode);
11302 emit_move_insn (tmp, operands[3]);
11305 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11307 tmp = gen_reg_rtx (mode);
11308 emit_move_insn (tmp, operands[2]);
11312 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11313 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11314 operands[2], operands[3])));
11316 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11317 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11318 operands[3], operands[0])));
11320 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11321 gen_rtx_IF_THEN_ELSE (mode, second_test,
11322 operands[2], operands[0])));
11327 /* Expand a floating-point vector conditional move; a vcond operation
11328 rather than a movcc operation. */
11331 ix86_expand_fp_vcond (rtx operands[])
11333 enum rtx_code code = GET_CODE (operands[3]);
11336 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11337 &operands[4], &operands[5]);
11338 if (code == UNKNOWN)
11341 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11342 operands[5], operands[1], operands[2]))
11345 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11346 operands[1], operands[2]);
11347 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11351 /* Expand a signed integral vector conditional move. */
11354 ix86_expand_int_vcond (rtx operands[])
11356 enum machine_mode mode = GET_MODE (operands[0]);
11357 enum rtx_code code = GET_CODE (operands[3]);
11358 bool negate = false;
11361 cop0 = operands[4];
11362 cop1 = operands[5];
11364 /* Canonicalize the comparison to EQ, GT, GTU. */
11375 code = reverse_condition (code);
11381 code = reverse_condition (code);
11387 code = swap_condition (code);
11388 x = cop0, cop0 = cop1, cop1 = x;
11392 gcc_unreachable ();
11395 /* Unsigned parallel compare is not supported by the hardware. Play some
11396 tricks to turn this into a signed comparison against 0. */
11405 /* Perform a parallel modulo subtraction. */
11406 t1 = gen_reg_rtx (mode);
11407 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11409 /* Extract the original sign bit of op0. */
11410 mask = GEN_INT (-0x80000000);
11411 mask = gen_rtx_CONST_VECTOR (mode,
11412 gen_rtvec (4, mask, mask, mask, mask));
11413 mask = force_reg (mode, mask);
11414 t2 = gen_reg_rtx (mode);
11415 emit_insn (gen_andv4si3 (t2, cop0, mask));
11417 /* XOR it back into the result of the subtraction. This results
11418 in the sign bit set iff we saw unsigned underflow. */
11419 x = gen_reg_rtx (mode);
11420 emit_insn (gen_xorv4si3 (x, t1, t2));
11428 /* Perform a parallel unsigned saturating subtraction. */
11429 x = gen_reg_rtx (mode);
11430 emit_insn (gen_rtx_SET (VOIDmode, x,
11431 gen_rtx_US_MINUS (mode, cop0, cop1)));
11438 gcc_unreachable ();
11442 cop1 = CONST0_RTX (mode);
11445 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11446 operands[1+negate], operands[2-negate]);
11448 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11449 operands[2-negate]);
11453 /* Expand conditional increment or decrement using adb/sbb instructions.
11454 The default case using setcc followed by the conditional move can be
11455 done by generic code. */
11457 ix86_expand_int_addcc (rtx operands[])
11459 enum rtx_code code = GET_CODE (operands[1]);
11461 rtx val = const0_rtx;
11462 bool fpcmp = false;
11463 enum machine_mode mode = GET_MODE (operands[0]);
11465 if (operands[3] != const1_rtx
11466 && operands[3] != constm1_rtx)
11468 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11469 ix86_compare_op1, &compare_op))
11471 code = GET_CODE (compare_op);
11473 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11474 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11477 code = ix86_fp_compare_code_to_integer (code);
11484 PUT_CODE (compare_op,
11485 reverse_condition_maybe_unordered
11486 (GET_CODE (compare_op)));
11488 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11490 PUT_MODE (compare_op, mode);
11492 /* Construct either adc or sbb insn. */
11493 if ((code == LTU) == (operands[3] == constm1_rtx))
11495 switch (GET_MODE (operands[0]))
11498 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11501 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11504 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11507 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11510 gcc_unreachable ();
11515 switch (GET_MODE (operands[0]))
11518 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11521 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11524 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11527 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11530 gcc_unreachable ();
11533 return 1; /* DONE */
11537 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11538 works for floating pointer parameters and nonoffsetable memories.
11539 For pushes, it returns just stack offsets; the values will be saved
11540 in the right order. Maximally three parts are generated. */
11543 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11548 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11550 size = (GET_MODE_SIZE (mode) + 4) / 8;
11552 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11553 gcc_assert (size >= 2 && size <= 3);
11555 /* Optimize constant pool reference to immediates. This is used by fp
11556 moves, that force all constants to memory to allow combining. */
11557 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11559 rtx tmp = maybe_get_pool_constant (operand);
11564 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11566 /* The only non-offsetable memories we handle are pushes. */
11567 int ok = push_operand (operand, VOIDmode);
11571 operand = copy_rtx (operand);
11572 PUT_MODE (operand, Pmode);
11573 parts[0] = parts[1] = parts[2] = operand;
11577 if (GET_CODE (operand) == CONST_VECTOR)
11579 enum machine_mode imode = int_mode_for_mode (mode);
11580 /* Caution: if we looked through a constant pool memory above,
11581 the operand may actually have a different mode now. That's
11582 ok, since we want to pun this all the way back to an integer. */
11583 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11584 gcc_assert (operand != NULL);
11590 if (mode == DImode)
11591 split_di (&operand, 1, &parts[0], &parts[1]);
11594 if (REG_P (operand))
11596 gcc_assert (reload_completed);
11597 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11598 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11600 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11602 else if (offsettable_memref_p (operand))
11604 operand = adjust_address (operand, SImode, 0);
11605 parts[0] = operand;
11606 parts[1] = adjust_address (operand, SImode, 4);
11608 parts[2] = adjust_address (operand, SImode, 8);
11610 else if (GET_CODE (operand) == CONST_DOUBLE)
11615 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11619 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11620 parts[2] = gen_int_mode (l[2], SImode);
11623 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11626 gcc_unreachable ();
11628 parts[1] = gen_int_mode (l[1], SImode);
11629 parts[0] = gen_int_mode (l[0], SImode);
11632 gcc_unreachable ();
11637 if (mode == TImode)
11638 split_ti (&operand, 1, &parts[0], &parts[1]);
11639 if (mode == XFmode || mode == TFmode)
11641 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11642 if (REG_P (operand))
11644 gcc_assert (reload_completed);
11645 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11646 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11648 else if (offsettable_memref_p (operand))
11650 operand = adjust_address (operand, DImode, 0);
11651 parts[0] = operand;
11652 parts[1] = adjust_address (operand, upper_mode, 8);
11654 else if (GET_CODE (operand) == CONST_DOUBLE)
11659 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11660 real_to_target (l, &r, mode);
11662 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11663 if (HOST_BITS_PER_WIDE_INT >= 64)
11666 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11667 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11670 parts[0] = immed_double_const (l[0], l[1], DImode);
11672 if (upper_mode == SImode)
11673 parts[1] = gen_int_mode (l[2], SImode);
11674 else if (HOST_BITS_PER_WIDE_INT >= 64)
11677 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11678 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11681 parts[1] = immed_double_const (l[2], l[3], DImode);
11684 gcc_unreachable ();
11691 /* Emit insns to perform a move or push of DI, DF, and XF values.
11692 Return false when normal moves are needed; true when all required
11693 insns have been emitted. Operands 2-4 contain the input values
11694 int the correct order; operands 5-7 contain the output values. */
11697 ix86_split_long_move (rtx operands[])
11702 int collisions = 0;
11703 enum machine_mode mode = GET_MODE (operands[0]);
11705 /* The DFmode expanders may ask us to move double.
11706 For 64bit target this is single move. By hiding the fact
11707 here we simplify i386.md splitters. */
11708 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11710 /* Optimize constant pool reference to immediates. This is used by
11711 fp moves, that force all constants to memory to allow combining. */
11713 if (GET_CODE (operands[1]) == MEM
11714 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11715 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11716 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11717 if (push_operand (operands[0], VOIDmode))
11719 operands[0] = copy_rtx (operands[0]);
11720 PUT_MODE (operands[0], Pmode);
11723 operands[0] = gen_lowpart (DImode, operands[0]);
11724 operands[1] = gen_lowpart (DImode, operands[1]);
11725 emit_move_insn (operands[0], operands[1]);
11729 /* The only non-offsettable memory we handle is push. */
11730 if (push_operand (operands[0], VOIDmode))
11733 gcc_assert (GET_CODE (operands[0]) != MEM
11734 || offsettable_memref_p (operands[0]));
11736 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11737 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11739 /* When emitting push, take care for source operands on the stack. */
11740 if (push && GET_CODE (operands[1]) == MEM
11741 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11744 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11745 XEXP (part[1][2], 0));
11746 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11747 XEXP (part[1][1], 0));
11750 /* We need to do copy in the right order in case an address register
11751 of the source overlaps the destination. */
11752 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11754 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11756 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11759 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11762 /* Collision in the middle part can be handled by reordering. */
11763 if (collisions == 1 && nparts == 3
11764 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11767 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11768 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11771 /* If there are more collisions, we can't handle it by reordering.
11772 Do an lea to the last part and use only one colliding move. */
11773 else if (collisions > 1)
11779 base = part[0][nparts - 1];
11781 /* Handle the case when the last part isn't valid for lea.
11782 Happens in 64-bit mode storing the 12-byte XFmode. */
11783 if (GET_MODE (base) != Pmode)
11784 base = gen_rtx_REG (Pmode, REGNO (base));
11786 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11787 part[1][0] = replace_equiv_address (part[1][0], base);
11788 part[1][1] = replace_equiv_address (part[1][1],
11789 plus_constant (base, UNITS_PER_WORD));
11791 part[1][2] = replace_equiv_address (part[1][2],
11792 plus_constant (base, 8));
11802 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11803 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11804 emit_move_insn (part[0][2], part[1][2]);
11809 /* In 64bit mode we don't have 32bit push available. In case this is
11810 register, it is OK - we will just use larger counterpart. We also
11811 retype memory - these comes from attempt to avoid REX prefix on
11812 moving of second half of TFmode value. */
11813 if (GET_MODE (part[1][1]) == SImode)
11815 switch (GET_CODE (part[1][1]))
11818 part[1][1] = adjust_address (part[1][1], DImode, 0);
11822 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11826 gcc_unreachable ();
11829 if (GET_MODE (part[1][0]) == SImode)
11830 part[1][0] = part[1][1];
11833 emit_move_insn (part[0][1], part[1][1]);
11834 emit_move_insn (part[0][0], part[1][0]);
11838 /* Choose correct order to not overwrite the source before it is copied. */
11839 if ((REG_P (part[0][0])
11840 && REG_P (part[1][1])
11841 && (REGNO (part[0][0]) == REGNO (part[1][1])
11843 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11845 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11849 operands[2] = part[0][2];
11850 operands[3] = part[0][1];
11851 operands[4] = part[0][0];
11852 operands[5] = part[1][2];
11853 operands[6] = part[1][1];
11854 operands[7] = part[1][0];
11858 operands[2] = part[0][1];
11859 operands[3] = part[0][0];
11860 operands[5] = part[1][1];
11861 operands[6] = part[1][0];
11868 operands[2] = part[0][0];
11869 operands[3] = part[0][1];
11870 operands[4] = part[0][2];
11871 operands[5] = part[1][0];
11872 operands[6] = part[1][1];
11873 operands[7] = part[1][2];
11877 operands[2] = part[0][0];
11878 operands[3] = part[0][1];
11879 operands[5] = part[1][0];
11880 operands[6] = part[1][1];
11884 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11887 if (GET_CODE (operands[5]) == CONST_INT
11888 && operands[5] != const0_rtx
11889 && REG_P (operands[2]))
11891 if (GET_CODE (operands[6]) == CONST_INT
11892 && INTVAL (operands[6]) == INTVAL (operands[5]))
11893 operands[6] = operands[2];
11896 && GET_CODE (operands[7]) == CONST_INT
11897 && INTVAL (operands[7]) == INTVAL (operands[5]))
11898 operands[7] = operands[2];
11902 && GET_CODE (operands[6]) == CONST_INT
11903 && operands[6] != const0_rtx
11904 && REG_P (operands[3])
11905 && GET_CODE (operands[7]) == CONST_INT
11906 && INTVAL (operands[7]) == INTVAL (operands[6]))
11907 operands[7] = operands[3];
11910 emit_move_insn (operands[2], operands[5]);
11911 emit_move_insn (operands[3], operands[6]);
11913 emit_move_insn (operands[4], operands[7]);
11918 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11919 left shift by a constant, either using a single shift or
11920 a sequence of add instructions. */
11923 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11927 emit_insn ((mode == DImode
11929 : gen_adddi3) (operand, operand, operand));
11931 else if (!optimize_size
11932 && count * ix86_cost->add <= ix86_cost->shift_const)
11935 for (i=0; i<count; i++)
11937 emit_insn ((mode == DImode
11939 : gen_adddi3) (operand, operand, operand));
11943 emit_insn ((mode == DImode
11945 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11949 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11951 rtx low[2], high[2];
11953 const int single_width = mode == DImode ? 32 : 64;
11955 if (GET_CODE (operands[2]) == CONST_INT)
11957 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11958 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11960 if (count >= single_width)
11962 emit_move_insn (high[0], low[1]);
11963 emit_move_insn (low[0], const0_rtx);
11965 if (count > single_width)
11966 ix86_expand_ashl_const (high[0], count - single_width, mode);
11970 if (!rtx_equal_p (operands[0], operands[1]))
11971 emit_move_insn (operands[0], operands[1]);
11972 emit_insn ((mode == DImode
11974 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11975 ix86_expand_ashl_const (low[0], count, mode);
11980 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11982 if (operands[1] == const1_rtx)
11984 /* Assuming we've chosen a QImode capable registers, then 1 << N
11985 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11986 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11988 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11990 ix86_expand_clear (low[0]);
11991 ix86_expand_clear (high[0]);
11992 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11994 d = gen_lowpart (QImode, low[0]);
11995 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11996 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11997 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11999 d = gen_lowpart (QImode, high[0]);
12000 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12001 s = gen_rtx_NE (QImode, flags, const0_rtx);
12002 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12005 /* Otherwise, we can get the same results by manually performing
12006 a bit extract operation on bit 5/6, and then performing the two
12007 shifts. The two methods of getting 0/1 into low/high are exactly
12008 the same size. Avoiding the shift in the bit extract case helps
12009 pentium4 a bit; no one else seems to care much either way. */
12014 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12015 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12017 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12018 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12020 emit_insn ((mode == DImode
12022 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12023 emit_insn ((mode == DImode
12025 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12026 emit_move_insn (low[0], high[0]);
12027 emit_insn ((mode == DImode
12029 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12032 emit_insn ((mode == DImode
12034 : gen_ashldi3) (low[0], low[0], operands[2]));
12035 emit_insn ((mode == DImode
12037 : gen_ashldi3) (high[0], high[0], operands[2]));
12041 if (operands[1] == constm1_rtx)
12043 /* For -1 << N, we can avoid the shld instruction, because we
12044 know that we're shifting 0...31/63 ones into a -1. */
12045 emit_move_insn (low[0], constm1_rtx);
12047 emit_move_insn (high[0], low[0]);
12049 emit_move_insn (high[0], constm1_rtx);
12053 if (!rtx_equal_p (operands[0], operands[1]))
12054 emit_move_insn (operands[0], operands[1]);
12056 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12057 emit_insn ((mode == DImode
12059 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12062 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12064 if (TARGET_CMOVE && scratch)
12066 ix86_expand_clear (scratch);
12067 emit_insn ((mode == DImode
12068 ? gen_x86_shift_adj_1
12069 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12072 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12076 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12078 rtx low[2], high[2];
12080 const int single_width = mode == DImode ? 32 : 64;
12082 if (GET_CODE (operands[2]) == CONST_INT)
12084 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12085 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12087 if (count == single_width * 2 - 1)
12089 emit_move_insn (high[0], high[1]);
12090 emit_insn ((mode == DImode
12092 : gen_ashrdi3) (high[0], high[0],
12093 GEN_INT (single_width - 1)));
12094 emit_move_insn (low[0], high[0]);
12097 else if (count >= single_width)
12099 emit_move_insn (low[0], high[1]);
12100 emit_move_insn (high[0], low[0]);
12101 emit_insn ((mode == DImode
12103 : gen_ashrdi3) (high[0], high[0],
12104 GEN_INT (single_width - 1)));
12105 if (count > single_width)
12106 emit_insn ((mode == DImode
12108 : gen_ashrdi3) (low[0], low[0],
12109 GEN_INT (count - single_width)));
12113 if (!rtx_equal_p (operands[0], operands[1]))
12114 emit_move_insn (operands[0], operands[1]);
12115 emit_insn ((mode == DImode
12117 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12118 emit_insn ((mode == DImode
12120 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12125 if (!rtx_equal_p (operands[0], operands[1]))
12126 emit_move_insn (operands[0], operands[1]);
12128 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12130 emit_insn ((mode == DImode
12132 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12133 emit_insn ((mode == DImode
12135 : gen_ashrdi3) (high[0], high[0], operands[2]));
12137 if (TARGET_CMOVE && scratch)
12139 emit_move_insn (scratch, high[0]);
12140 emit_insn ((mode == DImode
12142 : gen_ashrdi3) (scratch, scratch,
12143 GEN_INT (single_width - 1)));
12144 emit_insn ((mode == DImode
12145 ? gen_x86_shift_adj_1
12146 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12150 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12155 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12157 rtx low[2], high[2];
12159 const int single_width = mode == DImode ? 32 : 64;
12161 if (GET_CODE (operands[2]) == CONST_INT)
12163 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12164 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12166 if (count >= single_width)
12168 emit_move_insn (low[0], high[1]);
12169 ix86_expand_clear (high[0]);
12171 if (count > single_width)
12172 emit_insn ((mode == DImode
12174 : gen_lshrdi3) (low[0], low[0],
12175 GEN_INT (count - single_width)));
12179 if (!rtx_equal_p (operands[0], operands[1]))
12180 emit_move_insn (operands[0], operands[1]);
12181 emit_insn ((mode == DImode
12183 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12184 emit_insn ((mode == DImode
12186 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12191 if (!rtx_equal_p (operands[0], operands[1]))
12192 emit_move_insn (operands[0], operands[1]);
12194 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12196 emit_insn ((mode == DImode
12198 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12199 emit_insn ((mode == DImode
12201 : gen_lshrdi3) (high[0], high[0], operands[2]));
12203 /* Heh. By reversing the arguments, we can reuse this pattern. */
12204 if (TARGET_CMOVE && scratch)
12206 ix86_expand_clear (scratch);
12207 emit_insn ((mode == DImode
12208 ? gen_x86_shift_adj_1
12209 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12213 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12217 /* Helper function for the string operations below. Dest VARIABLE whether
12218 it is aligned to VALUE bytes. If true, jump to the label. */
12220 ix86_expand_aligntest (rtx variable, int value)
12222 rtx label = gen_label_rtx ();
12223 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12224 if (GET_MODE (variable) == DImode)
12225 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12227 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12228 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12233 /* Adjust COUNTER by the VALUE. */
12235 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12237 if (GET_MODE (countreg) == DImode)
12238 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12240 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12243 /* Zero extend possibly SImode EXP to Pmode register. */
12245 ix86_zero_extend_to_Pmode (rtx exp)
12248 if (GET_MODE (exp) == VOIDmode)
12249 return force_reg (Pmode, exp);
12250 if (GET_MODE (exp) == Pmode)
12251 return copy_to_mode_reg (Pmode, exp);
12252 r = gen_reg_rtx (Pmode);
12253 emit_insn (gen_zero_extendsidi2 (r, exp));
12257 /* Expand string move (memcpy) operation. Use i386 string operations when
12258 profitable. expand_clrmem contains similar code. */
12260 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12262 rtx srcreg, destreg, countreg, srcexp, destexp;
12263 enum machine_mode counter_mode;
12264 HOST_WIDE_INT align = 0;
12265 unsigned HOST_WIDE_INT count = 0;
12267 if (GET_CODE (align_exp) == CONST_INT)
12268 align = INTVAL (align_exp);
12270 /* Can't use any of this if the user has appropriated esi or edi. */
12271 if (global_regs[4] || global_regs[5])
12274 /* This simple hack avoids all inlining code and simplifies code below. */
12275 if (!TARGET_ALIGN_STRINGOPS)
12278 if (GET_CODE (count_exp) == CONST_INT)
12280 count = INTVAL (count_exp);
12281 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12285 /* Figure out proper mode for counter. For 32bits it is always SImode,
12286 for 64bits use SImode when possible, otherwise DImode.
12287 Set count to number of bytes copied when known at compile time. */
12289 || GET_MODE (count_exp) == SImode
12290 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12291 counter_mode = SImode;
12293 counter_mode = DImode;
12295 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12297 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12298 if (destreg != XEXP (dst, 0))
12299 dst = replace_equiv_address_nv (dst, destreg);
12300 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12301 if (srcreg != XEXP (src, 0))
12302 src = replace_equiv_address_nv (src, srcreg);
12304 /* When optimizing for size emit simple rep ; movsb instruction for
12305 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12306 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12307 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12308 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12309 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12310 known to be zero or not. The rep; movsb sequence causes higher
12311 register pressure though, so take that into account. */
12313 if ((!optimize || optimize_size)
12318 || (count & 3) + count / 4 > 6))))
12320 emit_insn (gen_cld ());
12321 countreg = ix86_zero_extend_to_Pmode (count_exp);
12322 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12323 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12324 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12328 /* For constant aligned (or small unaligned) copies use rep movsl
12329 followed by code copying the rest. For PentiumPro ensure 8 byte
12330 alignment to allow rep movsl acceleration. */
12332 else if (count != 0
12334 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12335 || optimize_size || count < (unsigned int) 64))
12337 unsigned HOST_WIDE_INT offset = 0;
12338 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12339 rtx srcmem, dstmem;
12341 emit_insn (gen_cld ());
12342 if (count & ~(size - 1))
12344 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12346 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12348 while (offset < (count & ~(size - 1)))
12350 srcmem = adjust_automodify_address_nv (src, movs_mode,
12352 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12354 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12360 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12361 & (TARGET_64BIT ? -1 : 0x3fffffff));
12362 countreg = copy_to_mode_reg (counter_mode, countreg);
12363 countreg = ix86_zero_extend_to_Pmode (countreg);
12365 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12366 GEN_INT (size == 4 ? 2 : 3));
12367 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12368 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12370 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12371 countreg, destexp, srcexp));
12372 offset = count & ~(size - 1);
12375 if (size == 8 && (count & 0x04))
12377 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12379 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12381 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12386 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12388 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12390 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12395 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12397 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12399 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12402 /* The generic code based on the glibc implementation:
12403 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12404 allowing accelerated copying there)
12405 - copy the data using rep movsl
12406 - copy the rest. */
12411 rtx srcmem, dstmem;
12412 int desired_alignment = (TARGET_PENTIUMPRO
12413 && (count == 0 || count >= (unsigned int) 260)
12414 ? 8 : UNITS_PER_WORD);
12415 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12416 dst = change_address (dst, BLKmode, destreg);
12417 src = change_address (src, BLKmode, srcreg);
12419 /* In case we don't know anything about the alignment, default to
12420 library version, since it is usually equally fast and result in
12423 Also emit call when we know that the count is large and call overhead
12424 will not be important. */
12425 if (!TARGET_INLINE_ALL_STRINGOPS
12426 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12429 if (TARGET_SINGLE_STRINGOP)
12430 emit_insn (gen_cld ());
12432 countreg2 = gen_reg_rtx (Pmode);
12433 countreg = copy_to_mode_reg (counter_mode, count_exp);
12435 /* We don't use loops to align destination and to copy parts smaller
12436 than 4 bytes, because gcc is able to optimize such code better (in
12437 the case the destination or the count really is aligned, gcc is often
12438 able to predict the branches) and also it is friendlier to the
12439 hardware branch prediction.
12441 Using loops is beneficial for generic case, because we can
12442 handle small counts using the loops. Many CPUs (such as Athlon)
12443 have large REP prefix setup costs.
12445 This is quite costly. Maybe we can revisit this decision later or
12446 add some customizability to this code. */
12448 if (count == 0 && align < desired_alignment)
12450 label = gen_label_rtx ();
12451 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12452 LEU, 0, counter_mode, 1, label);
12456 rtx label = ix86_expand_aligntest (destreg, 1);
12457 srcmem = change_address (src, QImode, srcreg);
12458 dstmem = change_address (dst, QImode, destreg);
12459 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12460 ix86_adjust_counter (countreg, 1);
12461 emit_label (label);
12462 LABEL_NUSES (label) = 1;
12466 rtx label = ix86_expand_aligntest (destreg, 2);
12467 srcmem = change_address (src, HImode, srcreg);
12468 dstmem = change_address (dst, HImode, destreg);
12469 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12470 ix86_adjust_counter (countreg, 2);
12471 emit_label (label);
12472 LABEL_NUSES (label) = 1;
12474 if (align <= 4 && desired_alignment > 4)
12476 rtx label = ix86_expand_aligntest (destreg, 4);
12477 srcmem = change_address (src, SImode, srcreg);
12478 dstmem = change_address (dst, SImode, destreg);
12479 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12480 ix86_adjust_counter (countreg, 4);
12481 emit_label (label);
12482 LABEL_NUSES (label) = 1;
12485 if (label && desired_alignment > 4 && !TARGET_64BIT)
12487 emit_label (label);
12488 LABEL_NUSES (label) = 1;
12491 if (!TARGET_SINGLE_STRINGOP)
12492 emit_insn (gen_cld ());
12495 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12497 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12501 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12502 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12504 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12505 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12506 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12507 countreg2, destexp, srcexp));
12511 emit_label (label);
12512 LABEL_NUSES (label) = 1;
12514 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12516 srcmem = change_address (src, SImode, srcreg);
12517 dstmem = change_address (dst, SImode, destreg);
12518 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12520 if ((align <= 4 || count == 0) && TARGET_64BIT)
12522 rtx label = ix86_expand_aligntest (countreg, 4);
12523 srcmem = change_address (src, SImode, srcreg);
12524 dstmem = change_address (dst, SImode, destreg);
12525 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12526 emit_label (label);
12527 LABEL_NUSES (label) = 1;
12529 if (align > 2 && count != 0 && (count & 2))
12531 srcmem = change_address (src, HImode, srcreg);
12532 dstmem = change_address (dst, HImode, destreg);
12533 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12535 if (align <= 2 || count == 0)
12537 rtx label = ix86_expand_aligntest (countreg, 2);
12538 srcmem = change_address (src, HImode, srcreg);
12539 dstmem = change_address (dst, HImode, destreg);
12540 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12541 emit_label (label);
12542 LABEL_NUSES (label) = 1;
12544 if (align > 1 && count != 0 && (count & 1))
12546 srcmem = change_address (src, QImode, srcreg);
12547 dstmem = change_address (dst, QImode, destreg);
12548 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12550 if (align <= 1 || count == 0)
12552 rtx label = ix86_expand_aligntest (countreg, 1);
12553 srcmem = change_address (src, QImode, srcreg);
12554 dstmem = change_address (dst, QImode, destreg);
12555 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12556 emit_label (label);
12557 LABEL_NUSES (label) = 1;
12564 /* Expand string clear operation (bzero). Use i386 string operations when
12565 profitable. expand_movmem contains similar code. */
12567 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12569 rtx destreg, zeroreg, countreg, destexp;
12570 enum machine_mode counter_mode;
12571 HOST_WIDE_INT align = 0;
12572 unsigned HOST_WIDE_INT count = 0;
12574 if (GET_CODE (align_exp) == CONST_INT)
12575 align = INTVAL (align_exp);
12577 /* Can't use any of this if the user has appropriated esi. */
12578 if (global_regs[4])
12581 /* This simple hack avoids all inlining code and simplifies code below. */
12582 if (!TARGET_ALIGN_STRINGOPS)
12585 if (GET_CODE (count_exp) == CONST_INT)
12587 count = INTVAL (count_exp);
12588 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12591 /* Figure out proper mode for counter. For 32bits it is always SImode,
12592 for 64bits use SImode when possible, otherwise DImode.
12593 Set count to number of bytes copied when known at compile time. */
12595 || GET_MODE (count_exp) == SImode
12596 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12597 counter_mode = SImode;
12599 counter_mode = DImode;
12601 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12602 if (destreg != XEXP (dst, 0))
12603 dst = replace_equiv_address_nv (dst, destreg);
12606 /* When optimizing for size emit simple rep ; movsb instruction for
12607 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12608 sequence is 7 bytes long, so if optimizing for size and count is
12609 small enough that some stosl, stosw and stosb instructions without
12610 rep are shorter, fall back into the next if. */
12612 if ((!optimize || optimize_size)
12615 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12617 emit_insn (gen_cld ());
12619 countreg = ix86_zero_extend_to_Pmode (count_exp);
12620 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12621 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12622 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12624 else if (count != 0
12626 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12627 || optimize_size || count < (unsigned int) 64))
12629 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12630 unsigned HOST_WIDE_INT offset = 0;
12632 emit_insn (gen_cld ());
12634 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12635 if (count & ~(size - 1))
12637 unsigned HOST_WIDE_INT repcount;
12638 unsigned int max_nonrep;
12640 repcount = count >> (size == 4 ? 2 : 3);
12642 repcount &= 0x3fffffff;
12644 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12645 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12646 bytes. In both cases the latter seems to be faster for small
12648 max_nonrep = size == 4 ? 7 : 4;
12649 if (!optimize_size)
12652 case PROCESSOR_PENTIUM4:
12653 case PROCESSOR_NOCONA:
12660 if (repcount <= max_nonrep)
12661 while (repcount-- > 0)
12663 rtx mem = adjust_automodify_address_nv (dst,
12664 GET_MODE (zeroreg),
12666 emit_insn (gen_strset (destreg, mem, zeroreg));
12671 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12672 countreg = ix86_zero_extend_to_Pmode (countreg);
12673 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12674 GEN_INT (size == 4 ? 2 : 3));
12675 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12676 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12678 offset = count & ~(size - 1);
12681 if (size == 8 && (count & 0x04))
12683 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12685 emit_insn (gen_strset (destreg, mem,
12686 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12691 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12693 emit_insn (gen_strset (destreg, mem,
12694 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12699 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12701 emit_insn (gen_strset (destreg, mem,
12702 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12709 /* Compute desired alignment of the string operation. */
12710 int desired_alignment = (TARGET_PENTIUMPRO
12711 && (count == 0 || count >= (unsigned int) 260)
12712 ? 8 : UNITS_PER_WORD);
12714 /* In case we don't know anything about the alignment, default to
12715 library version, since it is usually equally fast and result in
12718 Also emit call when we know that the count is large and call overhead
12719 will not be important. */
12720 if (!TARGET_INLINE_ALL_STRINGOPS
12721 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12724 if (TARGET_SINGLE_STRINGOP)
12725 emit_insn (gen_cld ());
12727 countreg2 = gen_reg_rtx (Pmode);
12728 countreg = copy_to_mode_reg (counter_mode, count_exp);
12729 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12730 /* Get rid of MEM_OFFSET, it won't be accurate. */
12731 dst = change_address (dst, BLKmode, destreg);
12733 if (count == 0 && align < desired_alignment)
12735 label = gen_label_rtx ();
12736 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12737 LEU, 0, counter_mode, 1, label);
12741 rtx label = ix86_expand_aligntest (destreg, 1);
12742 emit_insn (gen_strset (destreg, dst,
12743 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12744 ix86_adjust_counter (countreg, 1);
12745 emit_label (label);
12746 LABEL_NUSES (label) = 1;
12750 rtx label = ix86_expand_aligntest (destreg, 2);
12751 emit_insn (gen_strset (destreg, dst,
12752 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12753 ix86_adjust_counter (countreg, 2);
12754 emit_label (label);
12755 LABEL_NUSES (label) = 1;
12757 if (align <= 4 && desired_alignment > 4)
12759 rtx label = ix86_expand_aligntest (destreg, 4);
12760 emit_insn (gen_strset (destreg, dst,
12762 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12764 ix86_adjust_counter (countreg, 4);
12765 emit_label (label);
12766 LABEL_NUSES (label) = 1;
12769 if (label && desired_alignment > 4 && !TARGET_64BIT)
12771 emit_label (label);
12772 LABEL_NUSES (label) = 1;
12776 if (!TARGET_SINGLE_STRINGOP)
12777 emit_insn (gen_cld ());
12780 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12782 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12786 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12787 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12789 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12790 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12794 emit_label (label);
12795 LABEL_NUSES (label) = 1;
12798 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12799 emit_insn (gen_strset (destreg, dst,
12800 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12801 if (TARGET_64BIT && (align <= 4 || count == 0))
12803 rtx label = ix86_expand_aligntest (countreg, 4);
12804 emit_insn (gen_strset (destreg, dst,
12805 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12806 emit_label (label);
12807 LABEL_NUSES (label) = 1;
12809 if (align > 2 && count != 0 && (count & 2))
12810 emit_insn (gen_strset (destreg, dst,
12811 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12812 if (align <= 2 || count == 0)
12814 rtx label = ix86_expand_aligntest (countreg, 2);
12815 emit_insn (gen_strset (destreg, dst,
12816 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12817 emit_label (label);
12818 LABEL_NUSES (label) = 1;
12820 if (align > 1 && count != 0 && (count & 1))
12821 emit_insn (gen_strset (destreg, dst,
12822 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12823 if (align <= 1 || count == 0)
12825 rtx label = ix86_expand_aligntest (countreg, 1);
12826 emit_insn (gen_strset (destreg, dst,
12827 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12828 emit_label (label);
12829 LABEL_NUSES (label) = 1;
12835 /* Expand strlen. */
12837 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12839 rtx addr, scratch1, scratch2, scratch3, scratch4;
12841 /* The generic case of strlen expander is long. Avoid it's
12842 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12844 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12845 && !TARGET_INLINE_ALL_STRINGOPS
12847 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12850 addr = force_reg (Pmode, XEXP (src, 0));
12851 scratch1 = gen_reg_rtx (Pmode);
12853 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12856 /* Well it seems that some optimizer does not combine a call like
12857 foo(strlen(bar), strlen(bar));
12858 when the move and the subtraction is done here. It does calculate
12859 the length just once when these instructions are done inside of
12860 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12861 often used and I use one fewer register for the lifetime of
12862 output_strlen_unroll() this is better. */
12864 emit_move_insn (out, addr);
12866 ix86_expand_strlensi_unroll_1 (out, src, align);
12868 /* strlensi_unroll_1 returns the address of the zero at the end of
12869 the string, like memchr(), so compute the length by subtracting
12870 the start address. */
12872 emit_insn (gen_subdi3 (out, out, addr));
12874 emit_insn (gen_subsi3 (out, out, addr));
12879 scratch2 = gen_reg_rtx (Pmode);
12880 scratch3 = gen_reg_rtx (Pmode);
12881 scratch4 = force_reg (Pmode, constm1_rtx);
12883 emit_move_insn (scratch3, addr);
12884 eoschar = force_reg (QImode, eoschar);
12886 emit_insn (gen_cld ());
12887 src = replace_equiv_address_nv (src, scratch3);
12889 /* If .md starts supporting :P, this can be done in .md. */
12890 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12891 scratch4), UNSPEC_SCAS);
12892 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12895 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12896 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12900 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12901 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12907 /* Expand the appropriate insns for doing strlen if not just doing
12910 out = result, initialized with the start address
12911 align_rtx = alignment of the address.
12912 scratch = scratch register, initialized with the startaddress when
12913 not aligned, otherwise undefined
12915 This is just the body. It needs the initializations mentioned above and
12916 some address computing at the end. These things are done in i386.md. */
12919 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12923 rtx align_2_label = NULL_RTX;
12924 rtx align_3_label = NULL_RTX;
12925 rtx align_4_label = gen_label_rtx ();
12926 rtx end_0_label = gen_label_rtx ();
12928 rtx tmpreg = gen_reg_rtx (SImode);
12929 rtx scratch = gen_reg_rtx (SImode);
12933 if (GET_CODE (align_rtx) == CONST_INT)
12934 align = INTVAL (align_rtx);
12936 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12938 /* Is there a known alignment and is it less than 4? */
12941 rtx scratch1 = gen_reg_rtx (Pmode);
12942 emit_move_insn (scratch1, out);
12943 /* Is there a known alignment and is it not 2? */
12946 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12947 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12949 /* Leave just the 3 lower bits. */
12950 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12951 NULL_RTX, 0, OPTAB_WIDEN);
12953 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12954 Pmode, 1, align_4_label);
12955 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12956 Pmode, 1, align_2_label);
12957 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12958 Pmode, 1, align_3_label);
12962 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12963 check if is aligned to 4 - byte. */
12965 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12966 NULL_RTX, 0, OPTAB_WIDEN);
12968 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12969 Pmode, 1, align_4_label);
12972 mem = change_address (src, QImode, out);
12974 /* Now compare the bytes. */
12976 /* Compare the first n unaligned byte on a byte per byte basis. */
12977 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12978 QImode, 1, end_0_label);
12980 /* Increment the address. */
12982 emit_insn (gen_adddi3 (out, out, const1_rtx));
12984 emit_insn (gen_addsi3 (out, out, const1_rtx));
12986 /* Not needed with an alignment of 2 */
12989 emit_label (align_2_label);
12991 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12995 emit_insn (gen_adddi3 (out, out, const1_rtx));
12997 emit_insn (gen_addsi3 (out, out, const1_rtx));
12999 emit_label (align_3_label);
13002 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13006 emit_insn (gen_adddi3 (out, out, const1_rtx));
13008 emit_insn (gen_addsi3 (out, out, const1_rtx));
13011 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13012 align this loop. It gives only huge programs, but does not help to
13014 emit_label (align_4_label);
13016 mem = change_address (src, SImode, out);
13017 emit_move_insn (scratch, mem);
13019 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13021 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13023 /* This formula yields a nonzero result iff one of the bytes is zero.
13024 This saves three branches inside loop and many cycles. */
13026 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13027 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13028 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13029 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13030 gen_int_mode (0x80808080, SImode)));
13031 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13036 rtx reg = gen_reg_rtx (SImode);
13037 rtx reg2 = gen_reg_rtx (Pmode);
13038 emit_move_insn (reg, tmpreg);
13039 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13041 /* If zero is not in the first two bytes, move two bytes forward. */
13042 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13043 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13044 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13045 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13046 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13049 /* Emit lea manually to avoid clobbering of flags. */
13050 emit_insn (gen_rtx_SET (SImode, reg2,
13051 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13053 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13054 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13055 emit_insn (gen_rtx_SET (VOIDmode, out,
13056 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13063 rtx end_2_label = gen_label_rtx ();
13064 /* Is zero in the first two bytes? */
13066 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13067 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13068 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13069 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13070 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13072 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13073 JUMP_LABEL (tmp) = end_2_label;
13075 /* Not in the first two. Move two bytes forward. */
13076 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13078 emit_insn (gen_adddi3 (out, out, const2_rtx));
13080 emit_insn (gen_addsi3 (out, out, const2_rtx));
13082 emit_label (end_2_label);
13086 /* Avoid branch in fixing the byte. */
13087 tmpreg = gen_lowpart (QImode, tmpreg);
13088 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13089 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13091 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13093 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13095 emit_label (end_0_label);
13099 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13100 rtx callarg2 ATTRIBUTE_UNUSED,
13101 rtx pop, int sibcall)
13103 rtx use = NULL, call;
13105 if (pop == const0_rtx)
13107 gcc_assert (!TARGET_64BIT || !pop);
13110 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13111 fnaddr = machopic_indirect_call_target (fnaddr);
13113 /* Static functions and indirect calls don't need the pic register. */
13114 if (! TARGET_64BIT && flag_pic
13115 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13116 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13117 use_reg (&use, pic_offset_table_rtx);
13119 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13121 rtx al = gen_rtx_REG (QImode, 0);
13122 emit_move_insn (al, callarg2);
13123 use_reg (&use, al);
13125 #endif /* TARGET_MACHO */
13127 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13129 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13130 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13132 if (sibcall && TARGET_64BIT
13133 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13136 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13137 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13138 emit_move_insn (fnaddr, addr);
13139 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13142 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13144 call = gen_rtx_SET (VOIDmode, retval, call);
13147 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13148 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13149 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13152 call = emit_call_insn (call);
13154 CALL_INSN_FUNCTION_USAGE (call) = use;
13158 /* Clear stack slot assignments remembered from previous functions.
13159 This is called from INIT_EXPANDERS once before RTL is emitted for each
13162 static struct machine_function *
13163 ix86_init_machine_status (void)
13165 struct machine_function *f;
13167 f = ggc_alloc_cleared (sizeof (struct machine_function));
13168 f->use_fast_prologue_epilogue_nregs = -1;
13169 f->tls_descriptor_call_expanded_p = 0;
13174 /* Return a MEM corresponding to a stack slot with mode MODE.
13175 Allocate a new slot if necessary.
13177 The RTL for a function can have several slots available: N is
13178 which slot to use. */
13181 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13183 struct stack_local_entry *s;
13185 gcc_assert (n < MAX_386_STACK_LOCALS);
13187 for (s = ix86_stack_locals; s; s = s->next)
13188 if (s->mode == mode && s->n == n)
13191 s = (struct stack_local_entry *)
13192 ggc_alloc (sizeof (struct stack_local_entry));
13195 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13197 s->next = ix86_stack_locals;
13198 ix86_stack_locals = s;
13202 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13204 static GTY(()) rtx ix86_tls_symbol;
13206 ix86_tls_get_addr (void)
13209 if (!ix86_tls_symbol)
13211 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13212 (TARGET_ANY_GNU_TLS
13214 ? "___tls_get_addr"
13215 : "__tls_get_addr");
13218 return ix86_tls_symbol;
13221 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13223 static GTY(()) rtx ix86_tls_module_base_symbol;
13225 ix86_tls_module_base (void)
13228 if (!ix86_tls_module_base_symbol)
13230 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13231 "_TLS_MODULE_BASE_");
13232 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13233 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13236 return ix86_tls_module_base_symbol;
13239 /* Calculate the length of the memory address in the instruction
13240 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13243 memory_address_length (rtx addr)
13245 struct ix86_address parts;
13246 rtx base, index, disp;
13250 if (GET_CODE (addr) == PRE_DEC
13251 || GET_CODE (addr) == POST_INC
13252 || GET_CODE (addr) == PRE_MODIFY
13253 || GET_CODE (addr) == POST_MODIFY)
13256 ok = ix86_decompose_address (addr, &parts);
13259 if (parts.base && GET_CODE (parts.base) == SUBREG)
13260 parts.base = SUBREG_REG (parts.base);
13261 if (parts.index && GET_CODE (parts.index) == SUBREG)
13262 parts.index = SUBREG_REG (parts.index);
13265 index = parts.index;
13270 - esp as the base always wants an index,
13271 - ebp as the base always wants a displacement. */
13273 /* Register Indirect. */
13274 if (base && !index && !disp)
13276 /* esp (for its index) and ebp (for its displacement) need
13277 the two-byte modrm form. */
13278 if (addr == stack_pointer_rtx
13279 || addr == arg_pointer_rtx
13280 || addr == frame_pointer_rtx
13281 || addr == hard_frame_pointer_rtx)
13285 /* Direct Addressing. */
13286 else if (disp && !base && !index)
13291 /* Find the length of the displacement constant. */
13294 if (base && satisfies_constraint_K (disp))
13299 /* ebp always wants a displacement. */
13300 else if (base == hard_frame_pointer_rtx)
13303 /* An index requires the two-byte modrm form.... */
13305 /* ...like esp, which always wants an index. */
13306 || base == stack_pointer_rtx
13307 || base == arg_pointer_rtx
13308 || base == frame_pointer_rtx)
13315 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13316 is set, expect that insn have 8bit immediate alternative. */
13318 ix86_attr_length_immediate_default (rtx insn, int shortform)
13322 extract_insn_cached (insn);
13323 for (i = recog_data.n_operands - 1; i >= 0; --i)
13324 if (CONSTANT_P (recog_data.operand[i]))
13327 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13331 switch (get_attr_mode (insn))
13342 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13347 fatal_insn ("unknown insn mode", insn);
13353 /* Compute default value for "length_address" attribute. */
13355 ix86_attr_length_address_default (rtx insn)
13359 if (get_attr_type (insn) == TYPE_LEA)
13361 rtx set = PATTERN (insn);
13363 if (GET_CODE (set) == PARALLEL)
13364 set = XVECEXP (set, 0, 0);
13366 gcc_assert (GET_CODE (set) == SET);
13368 return memory_address_length (SET_SRC (set));
13371 extract_insn_cached (insn);
13372 for (i = recog_data.n_operands - 1; i >= 0; --i)
13373 if (GET_CODE (recog_data.operand[i]) == MEM)
13375 return memory_address_length (XEXP (recog_data.operand[i], 0));
13381 /* Return the maximum number of instructions a cpu can issue. */
13384 ix86_issue_rate (void)
13388 case PROCESSOR_PENTIUM:
13392 case PROCESSOR_PENTIUMPRO:
13393 case PROCESSOR_PENTIUM4:
13394 case PROCESSOR_ATHLON:
13396 case PROCESSOR_NOCONA:
13397 case PROCESSOR_GENERIC32:
13398 case PROCESSOR_GENERIC64:
13406 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13407 by DEP_INSN and nothing set by DEP_INSN. */
13410 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13414 /* Simplify the test for uninteresting insns. */
13415 if (insn_type != TYPE_SETCC
13416 && insn_type != TYPE_ICMOV
13417 && insn_type != TYPE_FCMOV
13418 && insn_type != TYPE_IBR)
13421 if ((set = single_set (dep_insn)) != 0)
13423 set = SET_DEST (set);
13426 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13427 && XVECLEN (PATTERN (dep_insn), 0) == 2
13428 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13429 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13431 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13432 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13437 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13440 /* This test is true if the dependent insn reads the flags but
13441 not any other potentially set register. */
13442 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13445 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13451 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13452 address with operands set by DEP_INSN. */
13455 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13459 if (insn_type == TYPE_LEA
13462 addr = PATTERN (insn);
13464 if (GET_CODE (addr) == PARALLEL)
13465 addr = XVECEXP (addr, 0, 0);
13467 gcc_assert (GET_CODE (addr) == SET);
13469 addr = SET_SRC (addr);
13474 extract_insn_cached (insn);
13475 for (i = recog_data.n_operands - 1; i >= 0; --i)
13476 if (GET_CODE (recog_data.operand[i]) == MEM)
13478 addr = XEXP (recog_data.operand[i], 0);
13485 return modified_in_p (addr, dep_insn);
13489 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13491 enum attr_type insn_type, dep_insn_type;
13492 enum attr_memory memory;
13494 int dep_insn_code_number;
13496 /* Anti and output dependencies have zero cost on all CPUs. */
13497 if (REG_NOTE_KIND (link) != 0)
13500 dep_insn_code_number = recog_memoized (dep_insn);
13502 /* If we can't recognize the insns, we can't really do anything. */
13503 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13506 insn_type = get_attr_type (insn);
13507 dep_insn_type = get_attr_type (dep_insn);
13511 case PROCESSOR_PENTIUM:
13512 /* Address Generation Interlock adds a cycle of latency. */
13513 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13516 /* ??? Compares pair with jump/setcc. */
13517 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13520 /* Floating point stores require value to be ready one cycle earlier. */
13521 if (insn_type == TYPE_FMOV
13522 && get_attr_memory (insn) == MEMORY_STORE
13523 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13527 case PROCESSOR_PENTIUMPRO:
13528 memory = get_attr_memory (insn);
13530 /* INT->FP conversion is expensive. */
13531 if (get_attr_fp_int_src (dep_insn))
13534 /* There is one cycle extra latency between an FP op and a store. */
13535 if (insn_type == TYPE_FMOV
13536 && (set = single_set (dep_insn)) != NULL_RTX
13537 && (set2 = single_set (insn)) != NULL_RTX
13538 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13539 && GET_CODE (SET_DEST (set2)) == MEM)
13542 /* Show ability of reorder buffer to hide latency of load by executing
13543 in parallel with previous instruction in case
13544 previous instruction is not needed to compute the address. */
13545 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13546 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13548 /* Claim moves to take one cycle, as core can issue one load
13549 at time and the next load can start cycle later. */
13550 if (dep_insn_type == TYPE_IMOV
13551 || dep_insn_type == TYPE_FMOV)
13559 memory = get_attr_memory (insn);
13561 /* The esp dependency is resolved before the instruction is really
13563 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13564 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13567 /* INT->FP conversion is expensive. */
13568 if (get_attr_fp_int_src (dep_insn))
13571 /* Show ability of reorder buffer to hide latency of load by executing
13572 in parallel with previous instruction in case
13573 previous instruction is not needed to compute the address. */
13574 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13575 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13577 /* Claim moves to take one cycle, as core can issue one load
13578 at time and the next load can start cycle later. */
13579 if (dep_insn_type == TYPE_IMOV
13580 || dep_insn_type == TYPE_FMOV)
13589 case PROCESSOR_ATHLON:
13591 case PROCESSOR_GENERIC32:
13592 case PROCESSOR_GENERIC64:
13593 memory = get_attr_memory (insn);
13595 /* Show ability of reorder buffer to hide latency of load by executing
13596 in parallel with previous instruction in case
13597 previous instruction is not needed to compute the address. */
13598 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13599 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13601 enum attr_unit unit = get_attr_unit (insn);
13604 /* Because of the difference between the length of integer and
13605 floating unit pipeline preparation stages, the memory operands
13606 for floating point are cheaper.
13608 ??? For Athlon it the difference is most probably 2. */
13609 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13612 loadcost = TARGET_ATHLON ? 2 : 0;
13614 if (cost >= loadcost)
13627 /* How many alternative schedules to try. This should be as wide as the
13628 scheduling freedom in the DFA, but no wider. Making this value too
13629 large results extra work for the scheduler. */
13632 ia32_multipass_dfa_lookahead (void)
13634 if (ix86_tune == PROCESSOR_PENTIUM)
13637 if (ix86_tune == PROCESSOR_PENTIUMPRO
13638 || ix86_tune == PROCESSOR_K6)
13646 /* Compute the alignment given to a constant that is being placed in memory.
13647 EXP is the constant and ALIGN is the alignment that the object would
13649 The value of this function is used instead of that alignment to align
13653 ix86_constant_alignment (tree exp, int align)
13655 if (TREE_CODE (exp) == REAL_CST)
13657 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13659 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13662 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13663 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13664 return BITS_PER_WORD;
13669 /* Compute the alignment for a static variable.
13670 TYPE is the data type, and ALIGN is the alignment that
13671 the object would ordinarily have. The value of this function is used
13672 instead of that alignment to align the object. */
13675 ix86_data_alignment (tree type, int align)
13677 int max_align = optimize_size ? BITS_PER_WORD : 256;
13679 if (AGGREGATE_TYPE_P (type)
13680 && TYPE_SIZE (type)
13681 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13682 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13683 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13684 && align < max_align)
13687 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13688 to 16byte boundary. */
13691 if (AGGREGATE_TYPE_P (type)
13692 && TYPE_SIZE (type)
13693 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13694 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13695 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13699 if (TREE_CODE (type) == ARRAY_TYPE)
13701 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13703 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13706 else if (TREE_CODE (type) == COMPLEX_TYPE)
13709 if (TYPE_MODE (type) == DCmode && align < 64)
13711 if (TYPE_MODE (type) == XCmode && align < 128)
13714 else if ((TREE_CODE (type) == RECORD_TYPE
13715 || TREE_CODE (type) == UNION_TYPE
13716 || TREE_CODE (type) == QUAL_UNION_TYPE)
13717 && TYPE_FIELDS (type))
13719 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13721 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13724 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13725 || TREE_CODE (type) == INTEGER_TYPE)
13727 if (TYPE_MODE (type) == DFmode && align < 64)
13729 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13736 /* Compute the alignment for a local variable.
13737 TYPE is the data type, and ALIGN is the alignment that
13738 the object would ordinarily have. The value of this macro is used
13739 instead of that alignment to align the object. */
13742 ix86_local_alignment (tree type, int align)
13744 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13745 to 16byte boundary. */
13748 if (AGGREGATE_TYPE_P (type)
13749 && TYPE_SIZE (type)
13750 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13751 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13752 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13755 if (TREE_CODE (type) == ARRAY_TYPE)
13757 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13759 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13762 else if (TREE_CODE (type) == COMPLEX_TYPE)
13764 if (TYPE_MODE (type) == DCmode && align < 64)
13766 if (TYPE_MODE (type) == XCmode && align < 128)
13769 else if ((TREE_CODE (type) == RECORD_TYPE
13770 || TREE_CODE (type) == UNION_TYPE
13771 || TREE_CODE (type) == QUAL_UNION_TYPE)
13772 && TYPE_FIELDS (type))
13774 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13776 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13779 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13780 || TREE_CODE (type) == INTEGER_TYPE)
13783 if (TYPE_MODE (type) == DFmode && align < 64)
13785 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13791 /* Emit RTL insns to initialize the variable parts of a trampoline.
13792 FNADDR is an RTX for the address of the function's pure code.
13793 CXT is an RTX for the static chain value for the function. */
13795 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13799 /* Compute offset from the end of the jmp to the target function. */
13800 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13801 plus_constant (tramp, 10),
13802 NULL_RTX, 1, OPTAB_DIRECT);
13803 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13804 gen_int_mode (0xb9, QImode));
13805 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13806 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13807 gen_int_mode (0xe9, QImode));
13808 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13813 /* Try to load address using shorter movl instead of movabs.
13814 We may want to support movq for kernel mode, but kernel does not use
13815 trampolines at the moment. */
13816 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13818 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13820 gen_int_mode (0xbb41, HImode));
13821 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13822 gen_lowpart (SImode, fnaddr));
13827 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13828 gen_int_mode (0xbb49, HImode));
13829 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13833 /* Load static chain using movabs to r10. */
13834 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13835 gen_int_mode (0xba49, HImode));
13836 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13839 /* Jump to the r11 */
13840 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13841 gen_int_mode (0xff49, HImode));
13842 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13843 gen_int_mode (0xe3, QImode));
13845 gcc_assert (offset <= TRAMPOLINE_SIZE);
13848 #ifdef ENABLE_EXECUTE_STACK
13849 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13850 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13854 /* Codes for all the SSE/MMX builtins. */
13857 IX86_BUILTIN_ADDPS,
13858 IX86_BUILTIN_ADDSS,
13859 IX86_BUILTIN_DIVPS,
13860 IX86_BUILTIN_DIVSS,
13861 IX86_BUILTIN_MULPS,
13862 IX86_BUILTIN_MULSS,
13863 IX86_BUILTIN_SUBPS,
13864 IX86_BUILTIN_SUBSS,
13866 IX86_BUILTIN_CMPEQPS,
13867 IX86_BUILTIN_CMPLTPS,
13868 IX86_BUILTIN_CMPLEPS,
13869 IX86_BUILTIN_CMPGTPS,
13870 IX86_BUILTIN_CMPGEPS,
13871 IX86_BUILTIN_CMPNEQPS,
13872 IX86_BUILTIN_CMPNLTPS,
13873 IX86_BUILTIN_CMPNLEPS,
13874 IX86_BUILTIN_CMPNGTPS,
13875 IX86_BUILTIN_CMPNGEPS,
13876 IX86_BUILTIN_CMPORDPS,
13877 IX86_BUILTIN_CMPUNORDPS,
13878 IX86_BUILTIN_CMPEQSS,
13879 IX86_BUILTIN_CMPLTSS,
13880 IX86_BUILTIN_CMPLESS,
13881 IX86_BUILTIN_CMPNEQSS,
13882 IX86_BUILTIN_CMPNLTSS,
13883 IX86_BUILTIN_CMPNLESS,
13884 IX86_BUILTIN_CMPNGTSS,
13885 IX86_BUILTIN_CMPNGESS,
13886 IX86_BUILTIN_CMPORDSS,
13887 IX86_BUILTIN_CMPUNORDSS,
13889 IX86_BUILTIN_COMIEQSS,
13890 IX86_BUILTIN_COMILTSS,
13891 IX86_BUILTIN_COMILESS,
13892 IX86_BUILTIN_COMIGTSS,
13893 IX86_BUILTIN_COMIGESS,
13894 IX86_BUILTIN_COMINEQSS,
13895 IX86_BUILTIN_UCOMIEQSS,
13896 IX86_BUILTIN_UCOMILTSS,
13897 IX86_BUILTIN_UCOMILESS,
13898 IX86_BUILTIN_UCOMIGTSS,
13899 IX86_BUILTIN_UCOMIGESS,
13900 IX86_BUILTIN_UCOMINEQSS,
13902 IX86_BUILTIN_CVTPI2PS,
13903 IX86_BUILTIN_CVTPS2PI,
13904 IX86_BUILTIN_CVTSI2SS,
13905 IX86_BUILTIN_CVTSI642SS,
13906 IX86_BUILTIN_CVTSS2SI,
13907 IX86_BUILTIN_CVTSS2SI64,
13908 IX86_BUILTIN_CVTTPS2PI,
13909 IX86_BUILTIN_CVTTSS2SI,
13910 IX86_BUILTIN_CVTTSS2SI64,
13912 IX86_BUILTIN_MAXPS,
13913 IX86_BUILTIN_MAXSS,
13914 IX86_BUILTIN_MINPS,
13915 IX86_BUILTIN_MINSS,
13917 IX86_BUILTIN_LOADUPS,
13918 IX86_BUILTIN_STOREUPS,
13919 IX86_BUILTIN_MOVSS,
13921 IX86_BUILTIN_MOVHLPS,
13922 IX86_BUILTIN_MOVLHPS,
13923 IX86_BUILTIN_LOADHPS,
13924 IX86_BUILTIN_LOADLPS,
13925 IX86_BUILTIN_STOREHPS,
13926 IX86_BUILTIN_STORELPS,
13928 IX86_BUILTIN_MASKMOVQ,
13929 IX86_BUILTIN_MOVMSKPS,
13930 IX86_BUILTIN_PMOVMSKB,
13932 IX86_BUILTIN_MOVNTPS,
13933 IX86_BUILTIN_MOVNTQ,
13935 IX86_BUILTIN_LOADDQU,
13936 IX86_BUILTIN_STOREDQU,
13938 IX86_BUILTIN_PACKSSWB,
13939 IX86_BUILTIN_PACKSSDW,
13940 IX86_BUILTIN_PACKUSWB,
13942 IX86_BUILTIN_PADDB,
13943 IX86_BUILTIN_PADDW,
13944 IX86_BUILTIN_PADDD,
13945 IX86_BUILTIN_PADDQ,
13946 IX86_BUILTIN_PADDSB,
13947 IX86_BUILTIN_PADDSW,
13948 IX86_BUILTIN_PADDUSB,
13949 IX86_BUILTIN_PADDUSW,
13950 IX86_BUILTIN_PSUBB,
13951 IX86_BUILTIN_PSUBW,
13952 IX86_BUILTIN_PSUBD,
13953 IX86_BUILTIN_PSUBQ,
13954 IX86_BUILTIN_PSUBSB,
13955 IX86_BUILTIN_PSUBSW,
13956 IX86_BUILTIN_PSUBUSB,
13957 IX86_BUILTIN_PSUBUSW,
13960 IX86_BUILTIN_PANDN,
13964 IX86_BUILTIN_PAVGB,
13965 IX86_BUILTIN_PAVGW,
13967 IX86_BUILTIN_PCMPEQB,
13968 IX86_BUILTIN_PCMPEQW,
13969 IX86_BUILTIN_PCMPEQD,
13970 IX86_BUILTIN_PCMPGTB,
13971 IX86_BUILTIN_PCMPGTW,
13972 IX86_BUILTIN_PCMPGTD,
13974 IX86_BUILTIN_PMADDWD,
13976 IX86_BUILTIN_PMAXSW,
13977 IX86_BUILTIN_PMAXUB,
13978 IX86_BUILTIN_PMINSW,
13979 IX86_BUILTIN_PMINUB,
13981 IX86_BUILTIN_PMULHUW,
13982 IX86_BUILTIN_PMULHW,
13983 IX86_BUILTIN_PMULLW,
13985 IX86_BUILTIN_PSADBW,
13986 IX86_BUILTIN_PSHUFW,
13988 IX86_BUILTIN_PSLLW,
13989 IX86_BUILTIN_PSLLD,
13990 IX86_BUILTIN_PSLLQ,
13991 IX86_BUILTIN_PSRAW,
13992 IX86_BUILTIN_PSRAD,
13993 IX86_BUILTIN_PSRLW,
13994 IX86_BUILTIN_PSRLD,
13995 IX86_BUILTIN_PSRLQ,
13996 IX86_BUILTIN_PSLLWI,
13997 IX86_BUILTIN_PSLLDI,
13998 IX86_BUILTIN_PSLLQI,
13999 IX86_BUILTIN_PSRAWI,
14000 IX86_BUILTIN_PSRADI,
14001 IX86_BUILTIN_PSRLWI,
14002 IX86_BUILTIN_PSRLDI,
14003 IX86_BUILTIN_PSRLQI,
14005 IX86_BUILTIN_PUNPCKHBW,
14006 IX86_BUILTIN_PUNPCKHWD,
14007 IX86_BUILTIN_PUNPCKHDQ,
14008 IX86_BUILTIN_PUNPCKLBW,
14009 IX86_BUILTIN_PUNPCKLWD,
14010 IX86_BUILTIN_PUNPCKLDQ,
14012 IX86_BUILTIN_SHUFPS,
14014 IX86_BUILTIN_RCPPS,
14015 IX86_BUILTIN_RCPSS,
14016 IX86_BUILTIN_RSQRTPS,
14017 IX86_BUILTIN_RSQRTSS,
14018 IX86_BUILTIN_SQRTPS,
14019 IX86_BUILTIN_SQRTSS,
14021 IX86_BUILTIN_UNPCKHPS,
14022 IX86_BUILTIN_UNPCKLPS,
14024 IX86_BUILTIN_ANDPS,
14025 IX86_BUILTIN_ANDNPS,
14027 IX86_BUILTIN_XORPS,
14030 IX86_BUILTIN_LDMXCSR,
14031 IX86_BUILTIN_STMXCSR,
14032 IX86_BUILTIN_SFENCE,
14034 /* 3DNow! Original */
14035 IX86_BUILTIN_FEMMS,
14036 IX86_BUILTIN_PAVGUSB,
14037 IX86_BUILTIN_PF2ID,
14038 IX86_BUILTIN_PFACC,
14039 IX86_BUILTIN_PFADD,
14040 IX86_BUILTIN_PFCMPEQ,
14041 IX86_BUILTIN_PFCMPGE,
14042 IX86_BUILTIN_PFCMPGT,
14043 IX86_BUILTIN_PFMAX,
14044 IX86_BUILTIN_PFMIN,
14045 IX86_BUILTIN_PFMUL,
14046 IX86_BUILTIN_PFRCP,
14047 IX86_BUILTIN_PFRCPIT1,
14048 IX86_BUILTIN_PFRCPIT2,
14049 IX86_BUILTIN_PFRSQIT1,
14050 IX86_BUILTIN_PFRSQRT,
14051 IX86_BUILTIN_PFSUB,
14052 IX86_BUILTIN_PFSUBR,
14053 IX86_BUILTIN_PI2FD,
14054 IX86_BUILTIN_PMULHRW,
14056 /* 3DNow! Athlon Extensions */
14057 IX86_BUILTIN_PF2IW,
14058 IX86_BUILTIN_PFNACC,
14059 IX86_BUILTIN_PFPNACC,
14060 IX86_BUILTIN_PI2FW,
14061 IX86_BUILTIN_PSWAPDSI,
14062 IX86_BUILTIN_PSWAPDSF,
14065 IX86_BUILTIN_ADDPD,
14066 IX86_BUILTIN_ADDSD,
14067 IX86_BUILTIN_DIVPD,
14068 IX86_BUILTIN_DIVSD,
14069 IX86_BUILTIN_MULPD,
14070 IX86_BUILTIN_MULSD,
14071 IX86_BUILTIN_SUBPD,
14072 IX86_BUILTIN_SUBSD,
14074 IX86_BUILTIN_CMPEQPD,
14075 IX86_BUILTIN_CMPLTPD,
14076 IX86_BUILTIN_CMPLEPD,
14077 IX86_BUILTIN_CMPGTPD,
14078 IX86_BUILTIN_CMPGEPD,
14079 IX86_BUILTIN_CMPNEQPD,
14080 IX86_BUILTIN_CMPNLTPD,
14081 IX86_BUILTIN_CMPNLEPD,
14082 IX86_BUILTIN_CMPNGTPD,
14083 IX86_BUILTIN_CMPNGEPD,
14084 IX86_BUILTIN_CMPORDPD,
14085 IX86_BUILTIN_CMPUNORDPD,
14086 IX86_BUILTIN_CMPNEPD,
14087 IX86_BUILTIN_CMPEQSD,
14088 IX86_BUILTIN_CMPLTSD,
14089 IX86_BUILTIN_CMPLESD,
14090 IX86_BUILTIN_CMPNEQSD,
14091 IX86_BUILTIN_CMPNLTSD,
14092 IX86_BUILTIN_CMPNLESD,
14093 IX86_BUILTIN_CMPORDSD,
14094 IX86_BUILTIN_CMPUNORDSD,
14095 IX86_BUILTIN_CMPNESD,
14097 IX86_BUILTIN_COMIEQSD,
14098 IX86_BUILTIN_COMILTSD,
14099 IX86_BUILTIN_COMILESD,
14100 IX86_BUILTIN_COMIGTSD,
14101 IX86_BUILTIN_COMIGESD,
14102 IX86_BUILTIN_COMINEQSD,
14103 IX86_BUILTIN_UCOMIEQSD,
14104 IX86_BUILTIN_UCOMILTSD,
14105 IX86_BUILTIN_UCOMILESD,
14106 IX86_BUILTIN_UCOMIGTSD,
14107 IX86_BUILTIN_UCOMIGESD,
14108 IX86_BUILTIN_UCOMINEQSD,
14110 IX86_BUILTIN_MAXPD,
14111 IX86_BUILTIN_MAXSD,
14112 IX86_BUILTIN_MINPD,
14113 IX86_BUILTIN_MINSD,
14115 IX86_BUILTIN_ANDPD,
14116 IX86_BUILTIN_ANDNPD,
14118 IX86_BUILTIN_XORPD,
14120 IX86_BUILTIN_SQRTPD,
14121 IX86_BUILTIN_SQRTSD,
14123 IX86_BUILTIN_UNPCKHPD,
14124 IX86_BUILTIN_UNPCKLPD,
14126 IX86_BUILTIN_SHUFPD,
14128 IX86_BUILTIN_LOADUPD,
14129 IX86_BUILTIN_STOREUPD,
14130 IX86_BUILTIN_MOVSD,
14132 IX86_BUILTIN_LOADHPD,
14133 IX86_BUILTIN_LOADLPD,
14135 IX86_BUILTIN_CVTDQ2PD,
14136 IX86_BUILTIN_CVTDQ2PS,
14138 IX86_BUILTIN_CVTPD2DQ,
14139 IX86_BUILTIN_CVTPD2PI,
14140 IX86_BUILTIN_CVTPD2PS,
14141 IX86_BUILTIN_CVTTPD2DQ,
14142 IX86_BUILTIN_CVTTPD2PI,
14144 IX86_BUILTIN_CVTPI2PD,
14145 IX86_BUILTIN_CVTSI2SD,
14146 IX86_BUILTIN_CVTSI642SD,
14148 IX86_BUILTIN_CVTSD2SI,
14149 IX86_BUILTIN_CVTSD2SI64,
14150 IX86_BUILTIN_CVTSD2SS,
14151 IX86_BUILTIN_CVTSS2SD,
14152 IX86_BUILTIN_CVTTSD2SI,
14153 IX86_BUILTIN_CVTTSD2SI64,
14155 IX86_BUILTIN_CVTPS2DQ,
14156 IX86_BUILTIN_CVTPS2PD,
14157 IX86_BUILTIN_CVTTPS2DQ,
14159 IX86_BUILTIN_MOVNTI,
14160 IX86_BUILTIN_MOVNTPD,
14161 IX86_BUILTIN_MOVNTDQ,
14164 IX86_BUILTIN_MASKMOVDQU,
14165 IX86_BUILTIN_MOVMSKPD,
14166 IX86_BUILTIN_PMOVMSKB128,
14168 IX86_BUILTIN_PACKSSWB128,
14169 IX86_BUILTIN_PACKSSDW128,
14170 IX86_BUILTIN_PACKUSWB128,
14172 IX86_BUILTIN_PADDB128,
14173 IX86_BUILTIN_PADDW128,
14174 IX86_BUILTIN_PADDD128,
14175 IX86_BUILTIN_PADDQ128,
14176 IX86_BUILTIN_PADDSB128,
14177 IX86_BUILTIN_PADDSW128,
14178 IX86_BUILTIN_PADDUSB128,
14179 IX86_BUILTIN_PADDUSW128,
14180 IX86_BUILTIN_PSUBB128,
14181 IX86_BUILTIN_PSUBW128,
14182 IX86_BUILTIN_PSUBD128,
14183 IX86_BUILTIN_PSUBQ128,
14184 IX86_BUILTIN_PSUBSB128,
14185 IX86_BUILTIN_PSUBSW128,
14186 IX86_BUILTIN_PSUBUSB128,
14187 IX86_BUILTIN_PSUBUSW128,
14189 IX86_BUILTIN_PAND128,
14190 IX86_BUILTIN_PANDN128,
14191 IX86_BUILTIN_POR128,
14192 IX86_BUILTIN_PXOR128,
14194 IX86_BUILTIN_PAVGB128,
14195 IX86_BUILTIN_PAVGW128,
14197 IX86_BUILTIN_PCMPEQB128,
14198 IX86_BUILTIN_PCMPEQW128,
14199 IX86_BUILTIN_PCMPEQD128,
14200 IX86_BUILTIN_PCMPGTB128,
14201 IX86_BUILTIN_PCMPGTW128,
14202 IX86_BUILTIN_PCMPGTD128,
14204 IX86_BUILTIN_PMADDWD128,
14206 IX86_BUILTIN_PMAXSW128,
14207 IX86_BUILTIN_PMAXUB128,
14208 IX86_BUILTIN_PMINSW128,
14209 IX86_BUILTIN_PMINUB128,
14211 IX86_BUILTIN_PMULUDQ,
14212 IX86_BUILTIN_PMULUDQ128,
14213 IX86_BUILTIN_PMULHUW128,
14214 IX86_BUILTIN_PMULHW128,
14215 IX86_BUILTIN_PMULLW128,
14217 IX86_BUILTIN_PSADBW128,
14218 IX86_BUILTIN_PSHUFHW,
14219 IX86_BUILTIN_PSHUFLW,
14220 IX86_BUILTIN_PSHUFD,
14222 IX86_BUILTIN_PSLLW128,
14223 IX86_BUILTIN_PSLLD128,
14224 IX86_BUILTIN_PSLLQ128,
14225 IX86_BUILTIN_PSRAW128,
14226 IX86_BUILTIN_PSRAD128,
14227 IX86_BUILTIN_PSRLW128,
14228 IX86_BUILTIN_PSRLD128,
14229 IX86_BUILTIN_PSRLQ128,
14230 IX86_BUILTIN_PSLLDQI128,
14231 IX86_BUILTIN_PSLLWI128,
14232 IX86_BUILTIN_PSLLDI128,
14233 IX86_BUILTIN_PSLLQI128,
14234 IX86_BUILTIN_PSRAWI128,
14235 IX86_BUILTIN_PSRADI128,
14236 IX86_BUILTIN_PSRLDQI128,
14237 IX86_BUILTIN_PSRLWI128,
14238 IX86_BUILTIN_PSRLDI128,
14239 IX86_BUILTIN_PSRLQI128,
14241 IX86_BUILTIN_PUNPCKHBW128,
14242 IX86_BUILTIN_PUNPCKHWD128,
14243 IX86_BUILTIN_PUNPCKHDQ128,
14244 IX86_BUILTIN_PUNPCKHQDQ128,
14245 IX86_BUILTIN_PUNPCKLBW128,
14246 IX86_BUILTIN_PUNPCKLWD128,
14247 IX86_BUILTIN_PUNPCKLDQ128,
14248 IX86_BUILTIN_PUNPCKLQDQ128,
14250 IX86_BUILTIN_CLFLUSH,
14251 IX86_BUILTIN_MFENCE,
14252 IX86_BUILTIN_LFENCE,
14254 /* Prescott New Instructions. */
14255 IX86_BUILTIN_ADDSUBPS,
14256 IX86_BUILTIN_HADDPS,
14257 IX86_BUILTIN_HSUBPS,
14258 IX86_BUILTIN_MOVSHDUP,
14259 IX86_BUILTIN_MOVSLDUP,
14260 IX86_BUILTIN_ADDSUBPD,
14261 IX86_BUILTIN_HADDPD,
14262 IX86_BUILTIN_HSUBPD,
14263 IX86_BUILTIN_LDDQU,
14265 IX86_BUILTIN_MONITOR,
14266 IX86_BUILTIN_MWAIT,
14268 IX86_BUILTIN_VEC_INIT_V2SI,
14269 IX86_BUILTIN_VEC_INIT_V4HI,
14270 IX86_BUILTIN_VEC_INIT_V8QI,
14271 IX86_BUILTIN_VEC_EXT_V2DF,
14272 IX86_BUILTIN_VEC_EXT_V2DI,
14273 IX86_BUILTIN_VEC_EXT_V4SF,
14274 IX86_BUILTIN_VEC_EXT_V4SI,
14275 IX86_BUILTIN_VEC_EXT_V8HI,
14276 IX86_BUILTIN_VEC_EXT_V2SI,
14277 IX86_BUILTIN_VEC_EXT_V4HI,
14278 IX86_BUILTIN_VEC_SET_V8HI,
14279 IX86_BUILTIN_VEC_SET_V4HI,
14281 /* SSE2 ABI functions. */
14282 IX86_BUILTIN_SSE2_ACOS,
14283 IX86_BUILTIN_SSE2_ACOSF,
14284 IX86_BUILTIN_SSE2_ASIN,
14285 IX86_BUILTIN_SSE2_ASINF,
14286 IX86_BUILTIN_SSE2_ATAN,
14287 IX86_BUILTIN_SSE2_ATANF,
14288 IX86_BUILTIN_SSE2_ATAN2,
14289 IX86_BUILTIN_SSE2_ATAN2F,
14290 IX86_BUILTIN_SSE2_COS,
14291 IX86_BUILTIN_SSE2_COSF,
14292 IX86_BUILTIN_SSE2_EXP,
14293 IX86_BUILTIN_SSE2_EXPF,
14294 IX86_BUILTIN_SSE2_LOG10,
14295 IX86_BUILTIN_SSE2_LOG10F,
14296 IX86_BUILTIN_SSE2_LOG,
14297 IX86_BUILTIN_SSE2_LOGF,
14298 IX86_BUILTIN_SSE2_SIN,
14299 IX86_BUILTIN_SSE2_SINF,
14300 IX86_BUILTIN_SSE2_TAN,
14301 IX86_BUILTIN_SSE2_TANF,
14306 #define def_builtin(MASK, NAME, TYPE, CODE) \
14308 if ((MASK) & target_flags \
14309 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14310 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14311 NULL, NULL_TREE); \
14314 /* Bits for builtin_description.flag. */
14316 /* Set when we don't support the comparison natively, and should
14317 swap_comparison in order to support it. */
14318 #define BUILTIN_DESC_SWAP_OPERANDS 1
14320 struct builtin_description
14322 const unsigned int mask;
14323 const enum insn_code icode;
14324 const char *const name;
14325 const enum ix86_builtins code;
14326 const enum rtx_code comparison;
14327 const unsigned int flag;
14330 static const struct builtin_description bdesc_comi[] =
14332 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14333 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14334 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14335 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14336 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14337 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14338 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14339 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14340 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14341 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14342 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14343 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14344 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14345 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14346 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14347 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14348 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14349 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14350 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14351 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14352 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14353 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14354 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14355 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14358 static const struct builtin_description bdesc_2arg[] =
14361 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14362 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14363 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14364 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14365 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14366 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14367 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14368 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14370 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14371 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14372 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14373 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14374 BUILTIN_DESC_SWAP_OPERANDS },
14375 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14376 BUILTIN_DESC_SWAP_OPERANDS },
14377 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14378 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14379 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14380 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14381 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14382 BUILTIN_DESC_SWAP_OPERANDS },
14383 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14384 BUILTIN_DESC_SWAP_OPERANDS },
14385 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14386 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14387 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14388 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14389 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14390 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14391 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14392 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14393 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14394 BUILTIN_DESC_SWAP_OPERANDS },
14395 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14396 BUILTIN_DESC_SWAP_OPERANDS },
14397 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14399 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14400 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14401 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14402 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14404 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14405 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14406 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14407 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14409 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14410 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14411 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14412 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14413 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14416 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14417 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14418 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14419 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14420 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14421 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14422 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14423 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14425 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14426 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14427 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14428 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14429 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14430 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14431 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14432 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14434 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14435 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14436 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14438 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14439 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14440 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14441 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14443 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14444 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14446 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14447 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14448 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14449 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14450 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14451 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14453 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14454 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14455 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14456 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14458 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14459 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14460 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14461 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14462 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14463 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14466 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14467 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14468 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14470 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14471 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14472 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14474 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14475 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14476 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14477 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14478 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14479 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14481 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14482 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14483 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14484 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14485 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14486 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14488 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14489 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14490 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14491 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14493 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14494 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14497 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14498 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14499 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14500 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14501 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14502 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14503 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14504 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14506 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14507 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14508 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14509 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14510 BUILTIN_DESC_SWAP_OPERANDS },
14511 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14512 BUILTIN_DESC_SWAP_OPERANDS },
14513 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14514 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14515 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14516 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14517 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14518 BUILTIN_DESC_SWAP_OPERANDS },
14519 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14520 BUILTIN_DESC_SWAP_OPERANDS },
14521 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14522 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14523 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14524 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14525 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14526 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14527 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14528 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14529 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14531 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14532 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14533 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14534 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14536 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14537 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14538 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14539 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14541 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14542 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14543 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14546 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14547 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14548 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14549 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14550 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14551 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14552 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14553 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14555 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14556 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14557 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14558 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14559 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14560 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14561 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14562 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14564 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14565 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14567 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14568 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14569 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14570 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14572 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14573 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14575 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14576 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14578 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14579 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14580 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14582 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14583 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14584 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14585 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14587 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14588 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14589 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14590 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14591 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14592 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14594 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14597 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14598 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14600 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14601 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14603 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14604 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14606 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14607 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14608 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14610 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14611 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14612 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14614 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14615 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14617 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14619 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14620 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14621 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14622 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14625 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14626 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14627 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14628 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14629 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14630 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14633 static const struct builtin_description bdesc_1arg[] =
14635 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14636 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14638 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14639 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14640 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14642 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14643 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14644 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14645 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14646 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14647 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14649 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14650 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14652 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14654 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14655 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14657 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14658 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14659 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14660 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14661 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14663 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14665 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14666 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14667 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14668 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14670 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14671 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14672 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14675 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14676 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14680 ix86_init_builtins (void)
14683 ix86_init_mmx_sse_builtins ();
14685 ix86_init_sse_abi_builtins ();
14688 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14689 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14692 ix86_init_mmx_sse_builtins (void)
14694 const struct builtin_description * d;
14697 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14698 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14699 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14700 tree V2DI_type_node
14701 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14702 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14703 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14704 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14705 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14706 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14707 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14709 tree pchar_type_node = build_pointer_type (char_type_node);
14710 tree pcchar_type_node = build_pointer_type (
14711 build_type_variant (char_type_node, 1, 0));
14712 tree pfloat_type_node = build_pointer_type (float_type_node);
14713 tree pcfloat_type_node = build_pointer_type (
14714 build_type_variant (float_type_node, 1, 0));
14715 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14716 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14717 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14720 tree int_ftype_v4sf_v4sf
14721 = build_function_type_list (integer_type_node,
14722 V4SF_type_node, V4SF_type_node, NULL_TREE);
14723 tree v4si_ftype_v4sf_v4sf
14724 = build_function_type_list (V4SI_type_node,
14725 V4SF_type_node, V4SF_type_node, NULL_TREE);
14726 /* MMX/SSE/integer conversions. */
14727 tree int_ftype_v4sf
14728 = build_function_type_list (integer_type_node,
14729 V4SF_type_node, NULL_TREE);
14730 tree int64_ftype_v4sf
14731 = build_function_type_list (long_long_integer_type_node,
14732 V4SF_type_node, NULL_TREE);
14733 tree int_ftype_v8qi
14734 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14735 tree v4sf_ftype_v4sf_int
14736 = build_function_type_list (V4SF_type_node,
14737 V4SF_type_node, integer_type_node, NULL_TREE);
14738 tree v4sf_ftype_v4sf_int64
14739 = build_function_type_list (V4SF_type_node,
14740 V4SF_type_node, long_long_integer_type_node,
14742 tree v4sf_ftype_v4sf_v2si
14743 = build_function_type_list (V4SF_type_node,
14744 V4SF_type_node, V2SI_type_node, NULL_TREE);
14746 /* Miscellaneous. */
14747 tree v8qi_ftype_v4hi_v4hi
14748 = build_function_type_list (V8QI_type_node,
14749 V4HI_type_node, V4HI_type_node, NULL_TREE);
14750 tree v4hi_ftype_v2si_v2si
14751 = build_function_type_list (V4HI_type_node,
14752 V2SI_type_node, V2SI_type_node, NULL_TREE);
14753 tree v4sf_ftype_v4sf_v4sf_int
14754 = build_function_type_list (V4SF_type_node,
14755 V4SF_type_node, V4SF_type_node,
14756 integer_type_node, NULL_TREE);
14757 tree v2si_ftype_v4hi_v4hi
14758 = build_function_type_list (V2SI_type_node,
14759 V4HI_type_node, V4HI_type_node, NULL_TREE);
14760 tree v4hi_ftype_v4hi_int
14761 = build_function_type_list (V4HI_type_node,
14762 V4HI_type_node, integer_type_node, NULL_TREE);
14763 tree v4hi_ftype_v4hi_di
14764 = build_function_type_list (V4HI_type_node,
14765 V4HI_type_node, long_long_unsigned_type_node,
14767 tree v2si_ftype_v2si_di
14768 = build_function_type_list (V2SI_type_node,
14769 V2SI_type_node, long_long_unsigned_type_node,
14771 tree void_ftype_void
14772 = build_function_type (void_type_node, void_list_node);
14773 tree void_ftype_unsigned
14774 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14775 tree void_ftype_unsigned_unsigned
14776 = build_function_type_list (void_type_node, unsigned_type_node,
14777 unsigned_type_node, NULL_TREE);
14778 tree void_ftype_pcvoid_unsigned_unsigned
14779 = build_function_type_list (void_type_node, const_ptr_type_node,
14780 unsigned_type_node, unsigned_type_node,
14782 tree unsigned_ftype_void
14783 = build_function_type (unsigned_type_node, void_list_node);
14784 tree v2si_ftype_v4sf
14785 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14786 /* Loads/stores. */
14787 tree void_ftype_v8qi_v8qi_pchar
14788 = build_function_type_list (void_type_node,
14789 V8QI_type_node, V8QI_type_node,
14790 pchar_type_node, NULL_TREE);
14791 tree v4sf_ftype_pcfloat
14792 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14793 /* @@@ the type is bogus */
14794 tree v4sf_ftype_v4sf_pv2si
14795 = build_function_type_list (V4SF_type_node,
14796 V4SF_type_node, pv2si_type_node, NULL_TREE);
14797 tree void_ftype_pv2si_v4sf
14798 = build_function_type_list (void_type_node,
14799 pv2si_type_node, V4SF_type_node, NULL_TREE);
14800 tree void_ftype_pfloat_v4sf
14801 = build_function_type_list (void_type_node,
14802 pfloat_type_node, V4SF_type_node, NULL_TREE);
14803 tree void_ftype_pdi_di
14804 = build_function_type_list (void_type_node,
14805 pdi_type_node, long_long_unsigned_type_node,
14807 tree void_ftype_pv2di_v2di
14808 = build_function_type_list (void_type_node,
14809 pv2di_type_node, V2DI_type_node, NULL_TREE);
14810 /* Normal vector unops. */
14811 tree v4sf_ftype_v4sf
14812 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14814 /* Normal vector binops. */
14815 tree v4sf_ftype_v4sf_v4sf
14816 = build_function_type_list (V4SF_type_node,
14817 V4SF_type_node, V4SF_type_node, NULL_TREE);
14818 tree v8qi_ftype_v8qi_v8qi
14819 = build_function_type_list (V8QI_type_node,
14820 V8QI_type_node, V8QI_type_node, NULL_TREE);
14821 tree v4hi_ftype_v4hi_v4hi
14822 = build_function_type_list (V4HI_type_node,
14823 V4HI_type_node, V4HI_type_node, NULL_TREE);
14824 tree v2si_ftype_v2si_v2si
14825 = build_function_type_list (V2SI_type_node,
14826 V2SI_type_node, V2SI_type_node, NULL_TREE);
14827 tree di_ftype_di_di
14828 = build_function_type_list (long_long_unsigned_type_node,
14829 long_long_unsigned_type_node,
14830 long_long_unsigned_type_node, NULL_TREE);
14832 tree v2si_ftype_v2sf
14833 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14834 tree v2sf_ftype_v2si
14835 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14836 tree v2si_ftype_v2si
14837 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14838 tree v2sf_ftype_v2sf
14839 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14840 tree v2sf_ftype_v2sf_v2sf
14841 = build_function_type_list (V2SF_type_node,
14842 V2SF_type_node, V2SF_type_node, NULL_TREE);
14843 tree v2si_ftype_v2sf_v2sf
14844 = build_function_type_list (V2SI_type_node,
14845 V2SF_type_node, V2SF_type_node, NULL_TREE);
14846 tree pint_type_node = build_pointer_type (integer_type_node);
14847 tree pdouble_type_node = build_pointer_type (double_type_node);
14848 tree pcdouble_type_node = build_pointer_type (
14849 build_type_variant (double_type_node, 1, 0));
14850 tree int_ftype_v2df_v2df
14851 = build_function_type_list (integer_type_node,
14852 V2DF_type_node, V2DF_type_node, NULL_TREE);
14854 tree void_ftype_pcvoid
14855 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14856 tree v4sf_ftype_v4si
14857 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14858 tree v4si_ftype_v4sf
14859 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14860 tree v2df_ftype_v4si
14861 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14862 tree v4si_ftype_v2df
14863 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14864 tree v2si_ftype_v2df
14865 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14866 tree v4sf_ftype_v2df
14867 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14868 tree v2df_ftype_v2si
14869 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14870 tree v2df_ftype_v4sf
14871 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14872 tree int_ftype_v2df
14873 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14874 tree int64_ftype_v2df
14875 = build_function_type_list (long_long_integer_type_node,
14876 V2DF_type_node, NULL_TREE);
14877 tree v2df_ftype_v2df_int
14878 = build_function_type_list (V2DF_type_node,
14879 V2DF_type_node, integer_type_node, NULL_TREE);
14880 tree v2df_ftype_v2df_int64
14881 = build_function_type_list (V2DF_type_node,
14882 V2DF_type_node, long_long_integer_type_node,
14884 tree v4sf_ftype_v4sf_v2df
14885 = build_function_type_list (V4SF_type_node,
14886 V4SF_type_node, V2DF_type_node, NULL_TREE);
14887 tree v2df_ftype_v2df_v4sf
14888 = build_function_type_list (V2DF_type_node,
14889 V2DF_type_node, V4SF_type_node, NULL_TREE);
14890 tree v2df_ftype_v2df_v2df_int
14891 = build_function_type_list (V2DF_type_node,
14892 V2DF_type_node, V2DF_type_node,
14895 tree v2df_ftype_v2df_pcdouble
14896 = build_function_type_list (V2DF_type_node,
14897 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14898 tree void_ftype_pdouble_v2df
14899 = build_function_type_list (void_type_node,
14900 pdouble_type_node, V2DF_type_node, NULL_TREE);
14901 tree void_ftype_pint_int
14902 = build_function_type_list (void_type_node,
14903 pint_type_node, integer_type_node, NULL_TREE);
14904 tree void_ftype_v16qi_v16qi_pchar
14905 = build_function_type_list (void_type_node,
14906 V16QI_type_node, V16QI_type_node,
14907 pchar_type_node, NULL_TREE);
14908 tree v2df_ftype_pcdouble
14909 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14910 tree v2df_ftype_v2df_v2df
14911 = build_function_type_list (V2DF_type_node,
14912 V2DF_type_node, V2DF_type_node, NULL_TREE);
14913 tree v16qi_ftype_v16qi_v16qi
14914 = build_function_type_list (V16QI_type_node,
14915 V16QI_type_node, V16QI_type_node, NULL_TREE);
14916 tree v8hi_ftype_v8hi_v8hi
14917 = build_function_type_list (V8HI_type_node,
14918 V8HI_type_node, V8HI_type_node, NULL_TREE);
14919 tree v4si_ftype_v4si_v4si
14920 = build_function_type_list (V4SI_type_node,
14921 V4SI_type_node, V4SI_type_node, NULL_TREE);
14922 tree v2di_ftype_v2di_v2di
14923 = build_function_type_list (V2DI_type_node,
14924 V2DI_type_node, V2DI_type_node, NULL_TREE);
14925 tree v2di_ftype_v2df_v2df
14926 = build_function_type_list (V2DI_type_node,
14927 V2DF_type_node, V2DF_type_node, NULL_TREE);
14928 tree v2df_ftype_v2df
14929 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14930 tree v2di_ftype_v2di_int
14931 = build_function_type_list (V2DI_type_node,
14932 V2DI_type_node, integer_type_node, NULL_TREE);
14933 tree v4si_ftype_v4si_int
14934 = build_function_type_list (V4SI_type_node,
14935 V4SI_type_node, integer_type_node, NULL_TREE);
14936 tree v8hi_ftype_v8hi_int
14937 = build_function_type_list (V8HI_type_node,
14938 V8HI_type_node, integer_type_node, NULL_TREE);
14939 tree v8hi_ftype_v8hi_v2di
14940 = build_function_type_list (V8HI_type_node,
14941 V8HI_type_node, V2DI_type_node, NULL_TREE);
14942 tree v4si_ftype_v4si_v2di
14943 = build_function_type_list (V4SI_type_node,
14944 V4SI_type_node, V2DI_type_node, NULL_TREE);
14945 tree v4si_ftype_v8hi_v8hi
14946 = build_function_type_list (V4SI_type_node,
14947 V8HI_type_node, V8HI_type_node, NULL_TREE);
14948 tree di_ftype_v8qi_v8qi
14949 = build_function_type_list (long_long_unsigned_type_node,
14950 V8QI_type_node, V8QI_type_node, NULL_TREE);
14951 tree di_ftype_v2si_v2si
14952 = build_function_type_list (long_long_unsigned_type_node,
14953 V2SI_type_node, V2SI_type_node, NULL_TREE);
14954 tree v2di_ftype_v16qi_v16qi
14955 = build_function_type_list (V2DI_type_node,
14956 V16QI_type_node, V16QI_type_node, NULL_TREE);
14957 tree v2di_ftype_v4si_v4si
14958 = build_function_type_list (V2DI_type_node,
14959 V4SI_type_node, V4SI_type_node, NULL_TREE);
14960 tree int_ftype_v16qi
14961 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14962 tree v16qi_ftype_pcchar
14963 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14964 tree void_ftype_pchar_v16qi
14965 = build_function_type_list (void_type_node,
14966 pchar_type_node, V16QI_type_node, NULL_TREE);
14969 tree float128_type;
14972 /* The __float80 type. */
14973 if (TYPE_MODE (long_double_type_node) == XFmode)
14974 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14978 /* The __float80 type. */
14979 float80_type = make_node (REAL_TYPE);
14980 TYPE_PRECISION (float80_type) = 80;
14981 layout_type (float80_type);
14982 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14987 float128_type = make_node (REAL_TYPE);
14988 TYPE_PRECISION (float128_type) = 128;
14989 layout_type (float128_type);
14990 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14993 /* Add all builtins that are more or less simple operations on two
14995 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14997 /* Use one of the operands; the target can have a different mode for
14998 mask-generating compares. */
14999 enum machine_mode mode;
15004 mode = insn_data[d->icode].operand[1].mode;
15009 type = v16qi_ftype_v16qi_v16qi;
15012 type = v8hi_ftype_v8hi_v8hi;
15015 type = v4si_ftype_v4si_v4si;
15018 type = v2di_ftype_v2di_v2di;
15021 type = v2df_ftype_v2df_v2df;
15024 type = v4sf_ftype_v4sf_v4sf;
15027 type = v8qi_ftype_v8qi_v8qi;
15030 type = v4hi_ftype_v4hi_v4hi;
15033 type = v2si_ftype_v2si_v2si;
15036 type = di_ftype_di_di;
15040 gcc_unreachable ();
15043 /* Override for comparisons. */
15044 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15045 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15046 type = v4si_ftype_v4sf_v4sf;
15048 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15049 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15050 type = v2di_ftype_v2df_v2df;
15052 def_builtin (d->mask, d->name, type, d->code);
15055 /* Add the remaining MMX insns with somewhat more complicated types. */
15056 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15057 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15058 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15059 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15061 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15062 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15063 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15065 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15066 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15068 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15069 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15071 /* comi/ucomi insns. */
15072 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15073 if (d->mask == MASK_SSE2)
15074 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15076 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15078 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15079 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15080 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15082 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15083 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15084 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15085 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15086 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15087 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15088 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15089 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15090 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15091 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15092 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15094 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15096 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15097 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15099 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15100 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15101 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15102 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15104 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15105 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15106 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15107 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15109 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15111 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15113 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15114 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15115 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15116 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15117 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15118 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15120 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15122 /* Original 3DNow! */
15123 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15124 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15125 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15126 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15127 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15128 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15129 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15130 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15131 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15132 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15133 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15134 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15135 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15136 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15137 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15138 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15139 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15140 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15141 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15142 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15144 /* 3DNow! extension as used in the Athlon CPU. */
15145 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15146 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15147 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15148 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15149 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15150 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15153 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15155 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15156 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15158 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15159 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15161 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15162 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15163 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15164 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15165 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15167 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15168 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15169 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15170 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15172 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15173 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15175 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15177 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15178 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15180 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15181 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15182 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15183 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15184 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15186 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15188 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15189 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15190 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15191 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15193 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15194 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15195 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15197 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15198 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15199 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15200 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15202 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15203 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15204 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15206 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15207 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15209 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15210 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15212 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15213 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15214 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15216 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15217 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15218 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15220 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15221 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15223 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15224 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15225 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15226 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15228 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15229 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15230 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15231 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15233 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15234 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15236 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15238 /* Prescott New Instructions. */
15239 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15240 void_ftype_pcvoid_unsigned_unsigned,
15241 IX86_BUILTIN_MONITOR);
15242 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15243 void_ftype_unsigned_unsigned,
15244 IX86_BUILTIN_MWAIT);
15245 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15247 IX86_BUILTIN_MOVSHDUP);
15248 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15250 IX86_BUILTIN_MOVSLDUP);
15251 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15252 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15254 /* Access to the vec_init patterns. */
15255 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15256 integer_type_node, NULL_TREE);
15257 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15258 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15260 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15261 short_integer_type_node,
15262 short_integer_type_node,
15263 short_integer_type_node, NULL_TREE);
15264 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15265 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15267 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15268 char_type_node, char_type_node,
15269 char_type_node, char_type_node,
15270 char_type_node, char_type_node,
15271 char_type_node, NULL_TREE);
15272 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15273 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15275 /* Access to the vec_extract patterns. */
15276 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15277 integer_type_node, NULL_TREE);
15278 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15279 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15281 ftype = build_function_type_list (long_long_integer_type_node,
15282 V2DI_type_node, integer_type_node,
15284 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15285 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15287 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15288 integer_type_node, NULL_TREE);
15289 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15290 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15292 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15293 integer_type_node, NULL_TREE);
15294 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15295 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15297 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15298 integer_type_node, NULL_TREE);
15299 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15300 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15302 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15303 integer_type_node, NULL_TREE);
15304 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15305 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15307 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15308 integer_type_node, NULL_TREE);
15309 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15310 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15312 /* Access to the vec_set patterns. */
15313 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15315 integer_type_node, NULL_TREE);
15316 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15317 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15319 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15321 integer_type_node, NULL_TREE);
15322 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15323 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15327 /* Set up all the SSE ABI builtins that we may use to override
15328 the normal builtins. */
15330 ix86_init_sse_abi_builtins (void)
15332 tree dbl, flt, dbl2, flt2;
15334 /* Bail out in case the template definitions are not available. */
15335 if (! built_in_decls [BUILT_IN_SIN]
15336 || ! built_in_decls [BUILT_IN_SINF]
15337 || ! built_in_decls [BUILT_IN_ATAN2]
15338 || ! built_in_decls [BUILT_IN_ATAN2F])
15341 /* Build the function types as variants of the existing ones. */
15342 dbl = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SIN]));
15343 TYPE_ATTRIBUTES (dbl)
15344 = tree_cons (get_identifier ("sseregparm"),
15345 NULL_TREE, TYPE_ATTRIBUTES (dbl));
15346 flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
15347 TYPE_ATTRIBUTES (flt)
15348 = tree_cons (get_identifier ("sseregparm"),
15349 NULL_TREE, TYPE_ATTRIBUTES (flt));
15350 dbl2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2]));
15351 TYPE_ATTRIBUTES (dbl2)
15352 = tree_cons (get_identifier ("sseregparm"),
15353 NULL_TREE, TYPE_ATTRIBUTES (dbl2));
15354 flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
15355 TYPE_ATTRIBUTES (flt2)
15356 = tree_cons (get_identifier ("sseregparm"),
15357 NULL_TREE, TYPE_ATTRIBUTES (flt2));
15359 #define def_builtin(capname, name, type) \
15360 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15361 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15362 IX86_BUILTIN_SSE2_ ## capname, \
15364 "__libm_sse2_" # name, NULL_TREE)
15366 def_builtin (ACOS, acos, dbl);
15367 def_builtin (ACOSF, acosf, flt);
15368 def_builtin (ASIN, asin, dbl);
15369 def_builtin (ASINF, asinf, flt);
15370 def_builtin (ATAN, atan, dbl);
15371 def_builtin (ATANF, atanf, flt);
15372 def_builtin (ATAN2, atan2, dbl2);
15373 def_builtin (ATAN2F, atan2f, flt2);
15374 def_builtin (COS, cos, dbl);
15375 def_builtin (COSF, cosf, flt);
15376 def_builtin (EXP, exp, dbl);
15377 def_builtin (EXPF, expf, flt);
15378 def_builtin (LOG10, log10, dbl);
15379 def_builtin (LOG10F, log10f, flt);
15380 def_builtin (LOG, log, dbl);
15381 def_builtin (LOGF, logf, flt);
15382 def_builtin (SIN, sin, dbl);
15383 def_builtin (SINF, sinf, flt);
15384 def_builtin (TAN, tan, dbl);
15385 def_builtin (TANF, tanf, flt);
15390 /* Errors in the source file can cause expand_expr to return const0_rtx
15391 where we expect a vector. To avoid crashing, use one of the vector
15392 clear instructions. */
15394 safe_vector_operand (rtx x, enum machine_mode mode)
15396 if (x == const0_rtx)
15397 x = CONST0_RTX (mode);
15401 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15404 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15407 tree arg0 = TREE_VALUE (arglist);
15408 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15409 rtx op0 = expand_normal (arg0);
15410 rtx op1 = expand_normal (arg1);
15411 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15412 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15413 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15415 if (VECTOR_MODE_P (mode0))
15416 op0 = safe_vector_operand (op0, mode0);
15417 if (VECTOR_MODE_P (mode1))
15418 op1 = safe_vector_operand (op1, mode1);
15420 if (optimize || !target
15421 || GET_MODE (target) != tmode
15422 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15423 target = gen_reg_rtx (tmode);
15425 if (GET_MODE (op1) == SImode && mode1 == TImode)
15427 rtx x = gen_reg_rtx (V4SImode);
15428 emit_insn (gen_sse2_loadd (x, op1));
15429 op1 = gen_lowpart (TImode, x);
15432 /* The insn must want input operands in the same modes as the
15434 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15435 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15437 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15438 op0 = copy_to_mode_reg (mode0, op0);
15439 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15440 op1 = copy_to_mode_reg (mode1, op1);
15442 /* ??? Using ix86_fixup_binary_operands is problematic when
15443 we've got mismatched modes. Fake it. */
15449 if (tmode == mode0 && tmode == mode1)
15451 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15455 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15457 op0 = force_reg (mode0, op0);
15458 op1 = force_reg (mode1, op1);
15459 target = gen_reg_rtx (tmode);
15462 pat = GEN_FCN (icode) (target, op0, op1);
15469 /* Subroutine of ix86_expand_builtin to take care of stores. */
15472 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15475 tree arg0 = TREE_VALUE (arglist);
15476 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15477 rtx op0 = expand_normal (arg0);
15478 rtx op1 = expand_normal (arg1);
15479 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15480 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15482 if (VECTOR_MODE_P (mode1))
15483 op1 = safe_vector_operand (op1, mode1);
15485 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15486 op1 = copy_to_mode_reg (mode1, op1);
15488 pat = GEN_FCN (icode) (op0, op1);
15494 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15497 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15498 rtx target, int do_load)
15501 tree arg0 = TREE_VALUE (arglist);
15502 rtx op0 = expand_normal (arg0);
15503 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15504 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15506 if (optimize || !target
15507 || GET_MODE (target) != tmode
15508 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15509 target = gen_reg_rtx (tmode);
15511 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15514 if (VECTOR_MODE_P (mode0))
15515 op0 = safe_vector_operand (op0, mode0);
15517 if ((optimize && !register_operand (op0, mode0))
15518 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15519 op0 = copy_to_mode_reg (mode0, op0);
15522 pat = GEN_FCN (icode) (target, op0);
15529 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15530 sqrtss, rsqrtss, rcpss. */
15533 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15536 tree arg0 = TREE_VALUE (arglist);
15537 rtx op1, op0 = expand_normal (arg0);
15538 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15539 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15541 if (optimize || !target
15542 || GET_MODE (target) != tmode
15543 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15544 target = gen_reg_rtx (tmode);
15546 if (VECTOR_MODE_P (mode0))
15547 op0 = safe_vector_operand (op0, mode0);
15549 if ((optimize && !register_operand (op0, mode0))
15550 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15551 op0 = copy_to_mode_reg (mode0, op0);
15554 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15555 op1 = copy_to_mode_reg (mode0, op1);
15557 pat = GEN_FCN (icode) (target, op0, op1);
15564 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15567 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15571 tree arg0 = TREE_VALUE (arglist);
15572 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15573 rtx op0 = expand_normal (arg0);
15574 rtx op1 = expand_normal (arg1);
15576 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15577 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15578 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15579 enum rtx_code comparison = d->comparison;
15581 if (VECTOR_MODE_P (mode0))
15582 op0 = safe_vector_operand (op0, mode0);
15583 if (VECTOR_MODE_P (mode1))
15584 op1 = safe_vector_operand (op1, mode1);
15586 /* Swap operands if we have a comparison that isn't available in
15588 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15590 rtx tmp = gen_reg_rtx (mode1);
15591 emit_move_insn (tmp, op1);
15596 if (optimize || !target
15597 || GET_MODE (target) != tmode
15598 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15599 target = gen_reg_rtx (tmode);
15601 if ((optimize && !register_operand (op0, mode0))
15602 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15603 op0 = copy_to_mode_reg (mode0, op0);
15604 if ((optimize && !register_operand (op1, mode1))
15605 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15606 op1 = copy_to_mode_reg (mode1, op1);
15608 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15609 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15616 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15619 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15623 tree arg0 = TREE_VALUE (arglist);
15624 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15625 rtx op0 = expand_normal (arg0);
15626 rtx op1 = expand_normal (arg1);
15628 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15629 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15630 enum rtx_code comparison = d->comparison;
15632 if (VECTOR_MODE_P (mode0))
15633 op0 = safe_vector_operand (op0, mode0);
15634 if (VECTOR_MODE_P (mode1))
15635 op1 = safe_vector_operand (op1, mode1);
15637 /* Swap operands if we have a comparison that isn't available in
15639 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15646 target = gen_reg_rtx (SImode);
15647 emit_move_insn (target, const0_rtx);
15648 target = gen_rtx_SUBREG (QImode, target, 0);
15650 if ((optimize && !register_operand (op0, mode0))
15651 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15652 op0 = copy_to_mode_reg (mode0, op0);
15653 if ((optimize && !register_operand (op1, mode1))
15654 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15655 op1 = copy_to_mode_reg (mode1, op1);
15657 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15658 pat = GEN_FCN (d->icode) (op0, op1);
15662 emit_insn (gen_rtx_SET (VOIDmode,
15663 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15664 gen_rtx_fmt_ee (comparison, QImode,
15668 return SUBREG_REG (target);
15671 /* Return the integer constant in ARG. Constrain it to be in the range
15672 of the subparts of VEC_TYPE; issue an error if not. */
15675 get_element_number (tree vec_type, tree arg)
15677 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15679 if (!host_integerp (arg, 1)
15680 || (elt = tree_low_cst (arg, 1), elt > max))
15682 error ("selector must be an integer constant in the range 0..%wi", max);
15689 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15690 ix86_expand_vector_init. We DO have language-level syntax for this, in
15691 the form of (type){ init-list }. Except that since we can't place emms
15692 instructions from inside the compiler, we can't allow the use of MMX
15693 registers unless the user explicitly asks for it. So we do *not* define
15694 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15695 we have builtins invoked by mmintrin.h that gives us license to emit
15696 these sorts of instructions. */
15699 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15701 enum machine_mode tmode = TYPE_MODE (type);
15702 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15703 int i, n_elt = GET_MODE_NUNITS (tmode);
15704 rtvec v = rtvec_alloc (n_elt);
15706 gcc_assert (VECTOR_MODE_P (tmode));
15708 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15710 rtx x = expand_normal (TREE_VALUE (arglist));
15711 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15714 gcc_assert (arglist == NULL);
15716 if (!target || !register_operand (target, tmode))
15717 target = gen_reg_rtx (tmode);
15719 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15723 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15724 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15725 had a language-level syntax for referencing vector elements. */
15728 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15730 enum machine_mode tmode, mode0;
15735 arg0 = TREE_VALUE (arglist);
15736 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15738 op0 = expand_normal (arg0);
15739 elt = get_element_number (TREE_TYPE (arg0), arg1);
15741 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15742 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15743 gcc_assert (VECTOR_MODE_P (mode0));
15745 op0 = force_reg (mode0, op0);
15747 if (optimize || !target || !register_operand (target, tmode))
15748 target = gen_reg_rtx (tmode);
15750 ix86_expand_vector_extract (true, target, op0, elt);
15755 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15756 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15757 a language-level syntax for referencing vector elements. */
15760 ix86_expand_vec_set_builtin (tree arglist)
15762 enum machine_mode tmode, mode1;
15763 tree arg0, arg1, arg2;
15767 arg0 = TREE_VALUE (arglist);
15768 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15769 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15771 tmode = TYPE_MODE (TREE_TYPE (arg0));
15772 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15773 gcc_assert (VECTOR_MODE_P (tmode));
15775 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15776 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15777 elt = get_element_number (TREE_TYPE (arg0), arg2);
15779 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15780 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15782 op0 = force_reg (tmode, op0);
15783 op1 = force_reg (mode1, op1);
15785 ix86_expand_vector_set (true, op0, op1, elt);
15790 /* Expand an expression EXP that calls a built-in function,
15791 with result going to TARGET if that's convenient
15792 (and in mode MODE if that's convenient).
15793 SUBTARGET may be used as the target for computing one of EXP's operands.
15794 IGNORE is nonzero if the value is to be ignored. */
15797 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15798 enum machine_mode mode ATTRIBUTE_UNUSED,
15799 int ignore ATTRIBUTE_UNUSED)
15801 const struct builtin_description *d;
15803 enum insn_code icode;
15804 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15805 tree arglist = TREE_OPERAND (exp, 1);
15806 tree arg0, arg1, arg2;
15807 rtx op0, op1, op2, pat;
15808 enum machine_mode tmode, mode0, mode1, mode2;
15809 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15813 case IX86_BUILTIN_EMMS:
15814 emit_insn (gen_mmx_emms ());
15817 case IX86_BUILTIN_SFENCE:
15818 emit_insn (gen_sse_sfence ());
15821 case IX86_BUILTIN_MASKMOVQ:
15822 case IX86_BUILTIN_MASKMOVDQU:
15823 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15824 ? CODE_FOR_mmx_maskmovq
15825 : CODE_FOR_sse2_maskmovdqu);
15826 /* Note the arg order is different from the operand order. */
15827 arg1 = TREE_VALUE (arglist);
15828 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15829 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15830 op0 = expand_normal (arg0);
15831 op1 = expand_normal (arg1);
15832 op2 = expand_normal (arg2);
15833 mode0 = insn_data[icode].operand[0].mode;
15834 mode1 = insn_data[icode].operand[1].mode;
15835 mode2 = insn_data[icode].operand[2].mode;
15837 op0 = force_reg (Pmode, op0);
15838 op0 = gen_rtx_MEM (mode1, op0);
15840 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15841 op0 = copy_to_mode_reg (mode0, op0);
15842 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15843 op1 = copy_to_mode_reg (mode1, op1);
15844 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15845 op2 = copy_to_mode_reg (mode2, op2);
15846 pat = GEN_FCN (icode) (op0, op1, op2);
15852 case IX86_BUILTIN_SQRTSS:
15853 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15854 case IX86_BUILTIN_RSQRTSS:
15855 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15856 case IX86_BUILTIN_RCPSS:
15857 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15859 case IX86_BUILTIN_LOADUPS:
15860 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15862 case IX86_BUILTIN_STOREUPS:
15863 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15865 case IX86_BUILTIN_LOADHPS:
15866 case IX86_BUILTIN_LOADLPS:
15867 case IX86_BUILTIN_LOADHPD:
15868 case IX86_BUILTIN_LOADLPD:
15869 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15870 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15871 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15872 : CODE_FOR_sse2_loadlpd);
15873 arg0 = TREE_VALUE (arglist);
15874 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15875 op0 = expand_normal (arg0);
15876 op1 = expand_normal (arg1);
15877 tmode = insn_data[icode].operand[0].mode;
15878 mode0 = insn_data[icode].operand[1].mode;
15879 mode1 = insn_data[icode].operand[2].mode;
15881 op0 = force_reg (mode0, op0);
15882 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15883 if (optimize || target == 0
15884 || GET_MODE (target) != tmode
15885 || !register_operand (target, tmode))
15886 target = gen_reg_rtx (tmode);
15887 pat = GEN_FCN (icode) (target, op0, op1);
15893 case IX86_BUILTIN_STOREHPS:
15894 case IX86_BUILTIN_STORELPS:
15895 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15896 : CODE_FOR_sse_storelps);
15897 arg0 = TREE_VALUE (arglist);
15898 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15899 op0 = expand_normal (arg0);
15900 op1 = expand_normal (arg1);
15901 mode0 = insn_data[icode].operand[0].mode;
15902 mode1 = insn_data[icode].operand[1].mode;
15904 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15905 op1 = force_reg (mode1, op1);
15907 pat = GEN_FCN (icode) (op0, op1);
15913 case IX86_BUILTIN_MOVNTPS:
15914 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15915 case IX86_BUILTIN_MOVNTQ:
15916 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15918 case IX86_BUILTIN_LDMXCSR:
15919 op0 = expand_normal (TREE_VALUE (arglist));
15920 target = assign_386_stack_local (SImode, SLOT_TEMP);
15921 emit_move_insn (target, op0);
15922 emit_insn (gen_sse_ldmxcsr (target));
15925 case IX86_BUILTIN_STMXCSR:
15926 target = assign_386_stack_local (SImode, SLOT_TEMP);
15927 emit_insn (gen_sse_stmxcsr (target));
15928 return copy_to_mode_reg (SImode, target);
15930 case IX86_BUILTIN_SHUFPS:
15931 case IX86_BUILTIN_SHUFPD:
15932 icode = (fcode == IX86_BUILTIN_SHUFPS
15933 ? CODE_FOR_sse_shufps
15934 : CODE_FOR_sse2_shufpd);
15935 arg0 = TREE_VALUE (arglist);
15936 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15937 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15938 op0 = expand_normal (arg0);
15939 op1 = expand_normal (arg1);
15940 op2 = expand_normal (arg2);
15941 tmode = insn_data[icode].operand[0].mode;
15942 mode0 = insn_data[icode].operand[1].mode;
15943 mode1 = insn_data[icode].operand[2].mode;
15944 mode2 = insn_data[icode].operand[3].mode;
15946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15947 op0 = copy_to_mode_reg (mode0, op0);
15948 if ((optimize && !register_operand (op1, mode1))
15949 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15950 op1 = copy_to_mode_reg (mode1, op1);
15951 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15953 /* @@@ better error message */
15954 error ("mask must be an immediate");
15955 return gen_reg_rtx (tmode);
15957 if (optimize || target == 0
15958 || GET_MODE (target) != tmode
15959 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15960 target = gen_reg_rtx (tmode);
15961 pat = GEN_FCN (icode) (target, op0, op1, op2);
15967 case IX86_BUILTIN_PSHUFW:
15968 case IX86_BUILTIN_PSHUFD:
15969 case IX86_BUILTIN_PSHUFHW:
15970 case IX86_BUILTIN_PSHUFLW:
15971 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15972 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15973 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15974 : CODE_FOR_mmx_pshufw);
15975 arg0 = TREE_VALUE (arglist);
15976 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15977 op0 = expand_normal (arg0);
15978 op1 = expand_normal (arg1);
15979 tmode = insn_data[icode].operand[0].mode;
15980 mode1 = insn_data[icode].operand[1].mode;
15981 mode2 = insn_data[icode].operand[2].mode;
15983 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15984 op0 = copy_to_mode_reg (mode1, op0);
15985 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15987 /* @@@ better error message */
15988 error ("mask must be an immediate");
15992 || GET_MODE (target) != tmode
15993 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15994 target = gen_reg_rtx (tmode);
15995 pat = GEN_FCN (icode) (target, op0, op1);
16001 case IX86_BUILTIN_PSLLDQI128:
16002 case IX86_BUILTIN_PSRLDQI128:
16003 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16004 : CODE_FOR_sse2_lshrti3);
16005 arg0 = TREE_VALUE (arglist);
16006 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16007 op0 = expand_normal (arg0);
16008 op1 = expand_normal (arg1);
16009 tmode = insn_data[icode].operand[0].mode;
16010 mode1 = insn_data[icode].operand[1].mode;
16011 mode2 = insn_data[icode].operand[2].mode;
16013 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16015 op0 = copy_to_reg (op0);
16016 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16018 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16020 error ("shift must be an immediate");
16023 target = gen_reg_rtx (V2DImode);
16024 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16030 case IX86_BUILTIN_FEMMS:
16031 emit_insn (gen_mmx_femms ());
16034 case IX86_BUILTIN_PAVGUSB:
16035 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16037 case IX86_BUILTIN_PF2ID:
16038 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16040 case IX86_BUILTIN_PFACC:
16041 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16043 case IX86_BUILTIN_PFADD:
16044 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16046 case IX86_BUILTIN_PFCMPEQ:
16047 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16049 case IX86_BUILTIN_PFCMPGE:
16050 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16052 case IX86_BUILTIN_PFCMPGT:
16053 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16055 case IX86_BUILTIN_PFMAX:
16056 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16058 case IX86_BUILTIN_PFMIN:
16059 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16061 case IX86_BUILTIN_PFMUL:
16062 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16064 case IX86_BUILTIN_PFRCP:
16065 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16067 case IX86_BUILTIN_PFRCPIT1:
16068 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16070 case IX86_BUILTIN_PFRCPIT2:
16071 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16073 case IX86_BUILTIN_PFRSQIT1:
16074 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16076 case IX86_BUILTIN_PFRSQRT:
16077 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16079 case IX86_BUILTIN_PFSUB:
16080 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16082 case IX86_BUILTIN_PFSUBR:
16083 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16085 case IX86_BUILTIN_PI2FD:
16086 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16088 case IX86_BUILTIN_PMULHRW:
16089 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16091 case IX86_BUILTIN_PF2IW:
16092 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16094 case IX86_BUILTIN_PFNACC:
16095 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16097 case IX86_BUILTIN_PFPNACC:
16098 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16100 case IX86_BUILTIN_PI2FW:
16101 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16103 case IX86_BUILTIN_PSWAPDSI:
16104 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16106 case IX86_BUILTIN_PSWAPDSF:
16107 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16109 case IX86_BUILTIN_SQRTSD:
16110 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16111 case IX86_BUILTIN_LOADUPD:
16112 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16113 case IX86_BUILTIN_STOREUPD:
16114 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16116 case IX86_BUILTIN_MFENCE:
16117 emit_insn (gen_sse2_mfence ());
16119 case IX86_BUILTIN_LFENCE:
16120 emit_insn (gen_sse2_lfence ());
16123 case IX86_BUILTIN_CLFLUSH:
16124 arg0 = TREE_VALUE (arglist);
16125 op0 = expand_normal (arg0);
16126 icode = CODE_FOR_sse2_clflush;
16127 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16128 op0 = copy_to_mode_reg (Pmode, op0);
16130 emit_insn (gen_sse2_clflush (op0));
16133 case IX86_BUILTIN_MOVNTPD:
16134 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16135 case IX86_BUILTIN_MOVNTDQ:
16136 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16137 case IX86_BUILTIN_MOVNTI:
16138 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16140 case IX86_BUILTIN_LOADDQU:
16141 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16142 case IX86_BUILTIN_STOREDQU:
16143 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16145 case IX86_BUILTIN_MONITOR:
16146 arg0 = TREE_VALUE (arglist);
16147 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16148 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16149 op0 = expand_normal (arg0);
16150 op1 = expand_normal (arg1);
16151 op2 = expand_normal (arg2);
16153 op0 = copy_to_mode_reg (SImode, op0);
16155 op1 = copy_to_mode_reg (SImode, op1);
16157 op2 = copy_to_mode_reg (SImode, op2);
16158 emit_insn (gen_sse3_monitor (op0, op1, op2));
16161 case IX86_BUILTIN_MWAIT:
16162 arg0 = TREE_VALUE (arglist);
16163 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16164 op0 = expand_normal (arg0);
16165 op1 = expand_normal (arg1);
16167 op0 = copy_to_mode_reg (SImode, op0);
16169 op1 = copy_to_mode_reg (SImode, op1);
16170 emit_insn (gen_sse3_mwait (op0, op1));
16173 case IX86_BUILTIN_LDDQU:
16174 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16177 case IX86_BUILTIN_VEC_INIT_V2SI:
16178 case IX86_BUILTIN_VEC_INIT_V4HI:
16179 case IX86_BUILTIN_VEC_INIT_V8QI:
16180 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16182 case IX86_BUILTIN_VEC_EXT_V2DF:
16183 case IX86_BUILTIN_VEC_EXT_V2DI:
16184 case IX86_BUILTIN_VEC_EXT_V4SF:
16185 case IX86_BUILTIN_VEC_EXT_V4SI:
16186 case IX86_BUILTIN_VEC_EXT_V8HI:
16187 case IX86_BUILTIN_VEC_EXT_V2SI:
16188 case IX86_BUILTIN_VEC_EXT_V4HI:
16189 return ix86_expand_vec_ext_builtin (arglist, target);
16191 case IX86_BUILTIN_VEC_SET_V8HI:
16192 case IX86_BUILTIN_VEC_SET_V4HI:
16193 return ix86_expand_vec_set_builtin (arglist);
16199 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16200 if (d->code == fcode)
16202 /* Compares are treated specially. */
16203 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16204 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16205 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16206 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16207 return ix86_expand_sse_compare (d, arglist, target);
16209 return ix86_expand_binop_builtin (d->icode, arglist, target);
16212 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16213 if (d->code == fcode)
16214 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16216 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16217 if (d->code == fcode)
16218 return ix86_expand_sse_comi (d, arglist, target);
16220 gcc_unreachable ();
16223 /* Expand an expression EXP that calls a built-in library function,
16224 with result going to TARGET if that's convenient
16225 (and in mode MODE if that's convenient).
16226 SUBTARGET may be used as the target for computing one of EXP's operands.
16227 IGNORE is nonzero if the value is to be ignored. */
16230 ix86_expand_library_builtin (tree exp, rtx target,
16231 rtx subtarget ATTRIBUTE_UNUSED,
16232 enum machine_mode mode ATTRIBUTE_UNUSED,
16235 enum built_in_function fncode;
16236 tree fndecl, newfn, call;
16238 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16239 if (!TARGET_SSELIBM)
16242 fncode = builtin_mathfn_code (exp);
16243 if (!ix86_builtin_function_variants [(int)fncode])
16246 fndecl = get_callee_fndecl (exp);
16247 if (DECL_RTL_SET_P (fndecl))
16250 /* Build the redirected call and expand it. */
16251 newfn = ix86_builtin_function_variants [(int)fncode];
16252 call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
16253 return expand_call (call, target, ignore);
16256 /* Store OPERAND to the memory after reload is completed. This means
16257 that we can't easily use assign_stack_local. */
16259 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16263 gcc_assert (reload_completed);
16264 if (TARGET_RED_ZONE)
16266 result = gen_rtx_MEM (mode,
16267 gen_rtx_PLUS (Pmode,
16269 GEN_INT (-RED_ZONE_SIZE)));
16270 emit_move_insn (result, operand);
16272 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16278 operand = gen_lowpart (DImode, operand);
16282 gen_rtx_SET (VOIDmode,
16283 gen_rtx_MEM (DImode,
16284 gen_rtx_PRE_DEC (DImode,
16285 stack_pointer_rtx)),
16289 gcc_unreachable ();
16291 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16300 split_di (&operand, 1, operands, operands + 1);
16302 gen_rtx_SET (VOIDmode,
16303 gen_rtx_MEM (SImode,
16304 gen_rtx_PRE_DEC (Pmode,
16305 stack_pointer_rtx)),
16308 gen_rtx_SET (VOIDmode,
16309 gen_rtx_MEM (SImode,
16310 gen_rtx_PRE_DEC (Pmode,
16311 stack_pointer_rtx)),
16316 /* Store HImodes as SImodes. */
16317 operand = gen_lowpart (SImode, operand);
16321 gen_rtx_SET (VOIDmode,
16322 gen_rtx_MEM (GET_MODE (operand),
16323 gen_rtx_PRE_DEC (SImode,
16324 stack_pointer_rtx)),
16328 gcc_unreachable ();
16330 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16335 /* Free operand from the memory. */
16337 ix86_free_from_memory (enum machine_mode mode)
16339 if (!TARGET_RED_ZONE)
16343 if (mode == DImode || TARGET_64BIT)
16347 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16348 to pop or add instruction if registers are available. */
16349 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16350 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16355 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16356 QImode must go into class Q_REGS.
16357 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16358 movdf to do mem-to-mem moves through integer regs. */
16360 ix86_preferred_reload_class (rtx x, enum reg_class class)
16362 /* We're only allowed to return a subclass of CLASS. Many of the
16363 following checks fail for NO_REGS, so eliminate that early. */
16364 if (class == NO_REGS)
16367 /* All classes can load zeros. */
16368 if (x == CONST0_RTX (GET_MODE (x)))
16371 /* Floating-point constants need more complex checks. */
16372 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16374 /* General regs can load everything. */
16375 if (reg_class_subset_p (class, GENERAL_REGS))
16378 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16379 zero above. We only want to wind up preferring 80387 registers if
16380 we plan on doing computation with them. */
16382 && (TARGET_MIX_SSE_I387
16383 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
16384 && standard_80387_constant_p (x))
16386 /* Limit class to non-sse. */
16387 if (class == FLOAT_SSE_REGS)
16389 if (class == FP_TOP_SSE_REGS)
16391 if (class == FP_SECOND_SSE_REGS)
16392 return FP_SECOND_REG;
16393 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16399 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
16401 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
16404 /* Generally when we see PLUS here, it's the function invariant
16405 (plus soft-fp const_int). Which can only be computed into general
16407 if (GET_CODE (x) == PLUS)
16408 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16410 /* QImode constants are easy to load, but non-constant QImode data
16411 must go into Q_REGS. */
16412 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16414 if (reg_class_subset_p (class, Q_REGS))
16416 if (reg_class_subset_p (Q_REGS, class))
16424 /* If we are copying between general and FP registers, we need a memory
16425 location. The same is true for SSE and MMX registers.
16427 The macro can't work reliably when one of the CLASSES is class containing
16428 registers from multiple units (SSE, MMX, integer). We avoid this by never
16429 combining those units in single alternative in the machine description.
16430 Ensure that this constraint holds to avoid unexpected surprises.
16432 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16433 enforce these sanity checks. */
16436 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16437 enum machine_mode mode, int strict)
16439 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16440 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16441 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16442 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16443 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16444 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16446 gcc_assert (!strict);
16450 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16453 /* ??? This is a lie. We do have moves between mmx/general, and for
16454 mmx/sse2. But by saying we need secondary memory we discourage the
16455 register allocator from using the mmx registers unless needed. */
16456 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16459 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16461 /* SSE1 doesn't have any direct moves from other classes. */
16465 /* If the target says that inter-unit moves are more expensive
16466 than moving through memory, then don't generate them. */
16467 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16470 /* Between SSE and general, we have moves no larger than word size. */
16471 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16474 /* ??? For the cost of one register reformat penalty, we could use
16475 the same instructions to move SFmode and DFmode data, but the
16476 relevant move patterns don't support those alternatives. */
16477 if (mode == SFmode || mode == DFmode)
16484 /* Return true if the registers in CLASS cannot represent the change from
16485 modes FROM to TO. */
16488 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16489 enum reg_class class)
16494 /* x87 registers can't do subreg at all, as all values are reformatted
16495 to extended precision. */
16496 if (MAYBE_FLOAT_CLASS_P (class))
16499 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16501 /* Vector registers do not support QI or HImode loads. If we don't
16502 disallow a change to these modes, reload will assume it's ok to
16503 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16504 the vec_dupv4hi pattern. */
16505 if (GET_MODE_SIZE (from) < 4)
16508 /* Vector registers do not support subreg with nonzero offsets, which
16509 are otherwise valid for integer registers. Since we can't see
16510 whether we have a nonzero offset from here, prohibit all
16511 nonparadoxical subregs changing size. */
16512 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16519 /* Return the cost of moving data from a register in class CLASS1 to
16520 one in class CLASS2.
16522 It is not required that the cost always equal 2 when FROM is the same as TO;
16523 on some machines it is expensive to move between registers if they are not
16524 general registers. */
16527 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16528 enum reg_class class2)
16530 /* In case we require secondary memory, compute cost of the store followed
16531 by load. In order to avoid bad register allocation choices, we need
16532 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16534 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16538 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16539 MEMORY_MOVE_COST (mode, class1, 1));
16540 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16541 MEMORY_MOVE_COST (mode, class2, 1));
16543 /* In case of copying from general_purpose_register we may emit multiple
16544 stores followed by single load causing memory size mismatch stall.
16545 Count this as arbitrarily high cost of 20. */
16546 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16549 /* In the case of FP/MMX moves, the registers actually overlap, and we
16550 have to switch modes in order to treat them differently. */
16551 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16552 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16558 /* Moves between SSE/MMX and integer unit are expensive. */
16559 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16560 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16561 return ix86_cost->mmxsse_to_integer;
16562 if (MAYBE_FLOAT_CLASS_P (class1))
16563 return ix86_cost->fp_move;
16564 if (MAYBE_SSE_CLASS_P (class1))
16565 return ix86_cost->sse_move;
16566 if (MAYBE_MMX_CLASS_P (class1))
16567 return ix86_cost->mmx_move;
16571 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16574 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16576 /* Flags and only flags can only hold CCmode values. */
16577 if (CC_REGNO_P (regno))
16578 return GET_MODE_CLASS (mode) == MODE_CC;
16579 if (GET_MODE_CLASS (mode) == MODE_CC
16580 || GET_MODE_CLASS (mode) == MODE_RANDOM
16581 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16583 if (FP_REGNO_P (regno))
16584 return VALID_FP_MODE_P (mode);
16585 if (SSE_REGNO_P (regno))
16587 /* We implement the move patterns for all vector modes into and
16588 out of SSE registers, even when no operation instructions
16590 return (VALID_SSE_REG_MODE (mode)
16591 || VALID_SSE2_REG_MODE (mode)
16592 || VALID_MMX_REG_MODE (mode)
16593 || VALID_MMX_REG_MODE_3DNOW (mode));
16595 if (MMX_REGNO_P (regno))
16597 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16598 so if the register is available at all, then we can move data of
16599 the given mode into or out of it. */
16600 return (VALID_MMX_REG_MODE (mode)
16601 || VALID_MMX_REG_MODE_3DNOW (mode));
16604 if (mode == QImode)
16606 /* Take care for QImode values - they can be in non-QI regs,
16607 but then they do cause partial register stalls. */
16608 if (regno < 4 || TARGET_64BIT)
16610 if (!TARGET_PARTIAL_REG_STALL)
16612 return reload_in_progress || reload_completed;
16614 /* We handle both integer and floats in the general purpose registers. */
16615 else if (VALID_INT_MODE_P (mode))
16617 else if (VALID_FP_MODE_P (mode))
16619 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16620 on to use that value in smaller contexts, this can easily force a
16621 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16622 supporting DImode, allow it. */
16623 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16629 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16630 tieable integer mode. */
16633 ix86_tieable_integer_mode_p (enum machine_mode mode)
16642 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16645 return TARGET_64BIT;
16652 /* Return true if MODE1 is accessible in a register that can hold MODE2
16653 without copying. That is, all register classes that can hold MODE2
16654 can also hold MODE1. */
16657 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16659 if (mode1 == mode2)
16662 if (ix86_tieable_integer_mode_p (mode1)
16663 && ix86_tieable_integer_mode_p (mode2))
16666 /* MODE2 being XFmode implies fp stack or general regs, which means we
16667 can tie any smaller floating point modes to it. Note that we do not
16668 tie this with TFmode. */
16669 if (mode2 == XFmode)
16670 return mode1 == SFmode || mode1 == DFmode;
16672 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16673 that we can tie it with SFmode. */
16674 if (mode2 == DFmode)
16675 return mode1 == SFmode;
16677 /* If MODE2 is only appropriate for an SSE register, then tie with
16678 any other mode acceptable to SSE registers. */
16679 if (GET_MODE_SIZE (mode2) >= 8
16680 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16681 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16683 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16684 with any other mode acceptable to MMX registers. */
16685 if (GET_MODE_SIZE (mode2) == 8
16686 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16687 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16692 /* Return the cost of moving data of mode M between a
16693 register and memory. A value of 2 is the default; this cost is
16694 relative to those in `REGISTER_MOVE_COST'.
16696 If moving between registers and memory is more expensive than
16697 between two registers, you should define this macro to express the
16700 Model also increased moving costs of QImode registers in non
16704 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16706 if (FLOAT_CLASS_P (class))
16723 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16725 if (SSE_CLASS_P (class))
16728 switch (GET_MODE_SIZE (mode))
16742 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16744 if (MMX_CLASS_P (class))
16747 switch (GET_MODE_SIZE (mode))
16758 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16760 switch (GET_MODE_SIZE (mode))
16764 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16765 : ix86_cost->movzbl_load);
16767 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16768 : ix86_cost->int_store[0] + 4);
16771 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16773 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16774 if (mode == TFmode)
16776 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16777 * (((int) GET_MODE_SIZE (mode)
16778 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16782 /* Compute a (partial) cost for rtx X. Return true if the complete
16783 cost has been computed, and false if subexpressions should be
16784 scanned. In either case, *TOTAL contains the cost result. */
16787 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16789 enum machine_mode mode = GET_MODE (x);
16797 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16799 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16801 else if (flag_pic && SYMBOLIC_CONST (x)
16803 || (!GET_CODE (x) != LABEL_REF
16804 && (GET_CODE (x) != SYMBOL_REF
16805 || !SYMBOL_REF_LOCAL_P (x)))))
16812 if (mode == VOIDmode)
16815 switch (standard_80387_constant_p (x))
16820 default: /* Other constants */
16825 /* Start with (MEM (SYMBOL_REF)), since that's where
16826 it'll probably end up. Add a penalty for size. */
16827 *total = (COSTS_N_INSNS (1)
16828 + (flag_pic != 0 && !TARGET_64BIT)
16829 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16835 /* The zero extensions is often completely free on x86_64, so make
16836 it as cheap as possible. */
16837 if (TARGET_64BIT && mode == DImode
16838 && GET_MODE (XEXP (x, 0)) == SImode)
16840 else if (TARGET_ZERO_EXTEND_WITH_AND)
16841 *total = ix86_cost->add;
16843 *total = ix86_cost->movzx;
16847 *total = ix86_cost->movsx;
16851 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16852 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16854 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16857 *total = ix86_cost->add;
16860 if ((value == 2 || value == 3)
16861 && ix86_cost->lea <= ix86_cost->shift_const)
16863 *total = ix86_cost->lea;
16873 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16875 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16877 if (INTVAL (XEXP (x, 1)) > 32)
16878 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
16880 *total = ix86_cost->shift_const * 2;
16884 if (GET_CODE (XEXP (x, 1)) == AND)
16885 *total = ix86_cost->shift_var * 2;
16887 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
16892 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16893 *total = ix86_cost->shift_const;
16895 *total = ix86_cost->shift_var;
16900 if (FLOAT_MODE_P (mode))
16902 *total = ix86_cost->fmul;
16907 rtx op0 = XEXP (x, 0);
16908 rtx op1 = XEXP (x, 1);
16910 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16912 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16913 for (nbits = 0; value != 0; value &= value - 1)
16917 /* This is arbitrary. */
16920 /* Compute costs correctly for widening multiplication. */
16921 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16922 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16923 == GET_MODE_SIZE (mode))
16925 int is_mulwiden = 0;
16926 enum machine_mode inner_mode = GET_MODE (op0);
16928 if (GET_CODE (op0) == GET_CODE (op1))
16929 is_mulwiden = 1, op1 = XEXP (op1, 0);
16930 else if (GET_CODE (op1) == CONST_INT)
16932 if (GET_CODE (op0) == SIGN_EXTEND)
16933 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16936 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16940 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16943 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16944 + nbits * ix86_cost->mult_bit
16945 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
16954 if (FLOAT_MODE_P (mode))
16955 *total = ix86_cost->fdiv;
16957 *total = ix86_cost->divide[MODE_INDEX (mode)];
16961 if (FLOAT_MODE_P (mode))
16962 *total = ix86_cost->fadd;
16963 else if (GET_MODE_CLASS (mode) == MODE_INT
16964 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16966 if (GET_CODE (XEXP (x, 0)) == PLUS
16967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16968 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16969 && CONSTANT_P (XEXP (x, 1)))
16971 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16972 if (val == 2 || val == 4 || val == 8)
16974 *total = ix86_cost->lea;
16975 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16976 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16978 *total += rtx_cost (XEXP (x, 1), outer_code);
16982 else if (GET_CODE (XEXP (x, 0)) == MULT
16983 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16985 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16986 if (val == 2 || val == 4 || val == 8)
16988 *total = ix86_cost->lea;
16989 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16990 *total += rtx_cost (XEXP (x, 1), outer_code);
16994 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16996 *total = ix86_cost->lea;
16997 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16998 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16999 *total += rtx_cost (XEXP (x, 1), outer_code);
17006 if (FLOAT_MODE_P (mode))
17008 *total = ix86_cost->fadd;
17016 if (!TARGET_64BIT && mode == DImode)
17018 *total = (ix86_cost->add * 2
17019 + (rtx_cost (XEXP (x, 0), outer_code)
17020 << (GET_MODE (XEXP (x, 0)) != DImode))
17021 + (rtx_cost (XEXP (x, 1), outer_code)
17022 << (GET_MODE (XEXP (x, 1)) != DImode)));
17028 if (FLOAT_MODE_P (mode))
17030 *total = ix86_cost->fchs;
17036 if (!TARGET_64BIT && mode == DImode)
17037 *total = ix86_cost->add * 2;
17039 *total = ix86_cost->add;
17043 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17044 && XEXP (XEXP (x, 0), 1) == const1_rtx
17045 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17046 && XEXP (x, 1) == const0_rtx)
17048 /* This kind of construct is implemented using test[bwl].
17049 Treat it as if we had an AND. */
17050 *total = (ix86_cost->add
17051 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17052 + rtx_cost (const1_rtx, outer_code));
17058 if (!TARGET_SSE_MATH
17060 || (mode == DFmode && !TARGET_SSE2))
17065 if (FLOAT_MODE_P (mode))
17066 *total = ix86_cost->fabs;
17070 if (FLOAT_MODE_P (mode))
17071 *total = ix86_cost->fsqrt;
17075 if (XINT (x, 1) == UNSPEC_TP)
17086 static int current_machopic_label_num;
17088 /* Given a symbol name and its associated stub, write out the
17089 definition of the stub. */
17092 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17094 unsigned int length;
17095 char *binder_name, *symbol_name, lazy_ptr_name[32];
17096 int label = ++current_machopic_label_num;
17098 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17099 symb = (*targetm.strip_name_encoding) (symb);
17101 length = strlen (stub);
17102 binder_name = alloca (length + 32);
17103 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17105 length = strlen (symb);
17106 symbol_name = alloca (length + 32);
17107 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17109 sprintf (lazy_ptr_name, "L%d$lz", label);
17112 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17114 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17116 fprintf (file, "%s:\n", stub);
17117 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17121 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
17122 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17123 fprintf (file, "\tjmp *%%edx\n");
17126 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
17128 fprintf (file, "%s:\n", binder_name);
17132 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17133 fprintf (file, "\tpushl %%eax\n");
17136 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
17138 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
17140 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17141 fprintf (file, "%s:\n", lazy_ptr_name);
17142 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17143 fprintf (file, "\t.long %s\n", binder_name);
17147 darwin_x86_file_end (void)
17149 darwin_file_end ();
17152 #endif /* TARGET_MACHO */
17154 /* Order the registers for register allocator. */
17157 x86_order_regs_for_local_alloc (void)
17162 /* First allocate the local general purpose registers. */
17163 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17164 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17165 reg_alloc_order [pos++] = i;
17167 /* Global general purpose registers. */
17168 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17169 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17170 reg_alloc_order [pos++] = i;
17172 /* x87 registers come first in case we are doing FP math
17174 if (!TARGET_SSE_MATH)
17175 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17176 reg_alloc_order [pos++] = i;
17178 /* SSE registers. */
17179 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17180 reg_alloc_order [pos++] = i;
17181 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17182 reg_alloc_order [pos++] = i;
17184 /* x87 registers. */
17185 if (TARGET_SSE_MATH)
17186 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17187 reg_alloc_order [pos++] = i;
17189 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17190 reg_alloc_order [pos++] = i;
17192 /* Initialize the rest of array as we do not allocate some registers
17194 while (pos < FIRST_PSEUDO_REGISTER)
17195 reg_alloc_order [pos++] = 0;
17198 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17199 struct attribute_spec.handler. */
17201 ix86_handle_struct_attribute (tree *node, tree name,
17202 tree args ATTRIBUTE_UNUSED,
17203 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17206 if (DECL_P (*node))
17208 if (TREE_CODE (*node) == TYPE_DECL)
17209 type = &TREE_TYPE (*node);
17214 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17215 || TREE_CODE (*type) == UNION_TYPE)))
17217 warning (OPT_Wattributes, "%qs attribute ignored",
17218 IDENTIFIER_POINTER (name));
17219 *no_add_attrs = true;
17222 else if ((is_attribute_p ("ms_struct", name)
17223 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17224 || ((is_attribute_p ("gcc_struct", name)
17225 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17227 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17228 IDENTIFIER_POINTER (name));
17229 *no_add_attrs = true;
17236 ix86_ms_bitfield_layout_p (tree record_type)
17238 return (TARGET_MS_BITFIELD_LAYOUT &&
17239 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17240 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17243 /* Returns an expression indicating where the this parameter is
17244 located on entry to the FUNCTION. */
17247 x86_this_parameter (tree function)
17249 tree type = TREE_TYPE (function);
17253 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17254 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17257 if (ix86_function_regparm (type, function) > 0)
17261 parm = TYPE_ARG_TYPES (type);
17262 /* Figure out whether or not the function has a variable number of
17264 for (; parm; parm = TREE_CHAIN (parm))
17265 if (TREE_VALUE (parm) == void_type_node)
17267 /* If not, the this parameter is in the first argument. */
17271 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17273 return gen_rtx_REG (SImode, regno);
17277 if (aggregate_value_p (TREE_TYPE (type), type))
17278 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17280 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17283 /* Determine whether x86_output_mi_thunk can succeed. */
17286 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17287 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17288 HOST_WIDE_INT vcall_offset, tree function)
17290 /* 64-bit can handle anything. */
17294 /* For 32-bit, everything's fine if we have one free register. */
17295 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17298 /* Need a free register for vcall_offset. */
17302 /* Need a free register for GOT references. */
17303 if (flag_pic && !(*targetm.binds_local_p) (function))
17306 /* Otherwise ok. */
17310 /* Output the assembler code for a thunk function. THUNK_DECL is the
17311 declaration for the thunk function itself, FUNCTION is the decl for
17312 the target function. DELTA is an immediate constant offset to be
17313 added to THIS. If VCALL_OFFSET is nonzero, the word at
17314 *(*this + vcall_offset) should be added to THIS. */
17317 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17318 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17319 HOST_WIDE_INT vcall_offset, tree function)
17322 rtx this = x86_this_parameter (function);
17325 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17326 pull it in now and let DELTA benefit. */
17329 else if (vcall_offset)
17331 /* Put the this parameter into %eax. */
17333 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17334 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17337 this_reg = NULL_RTX;
17339 /* Adjust the this parameter by a fixed constant. */
17342 xops[0] = GEN_INT (delta);
17343 xops[1] = this_reg ? this_reg : this;
17346 if (!x86_64_general_operand (xops[0], DImode))
17348 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17350 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17354 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17357 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17360 /* Adjust the this parameter by a value stored in the vtable. */
17364 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17367 int tmp_regno = 2 /* ECX */;
17368 if (lookup_attribute ("fastcall",
17369 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17370 tmp_regno = 0 /* EAX */;
17371 tmp = gen_rtx_REG (SImode, tmp_regno);
17374 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17377 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17379 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17381 /* Adjust the this parameter. */
17382 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17383 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17385 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17386 xops[0] = GEN_INT (vcall_offset);
17388 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17389 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17391 xops[1] = this_reg;
17393 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17395 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17398 /* If necessary, drop THIS back to its stack slot. */
17399 if (this_reg && this_reg != this)
17401 xops[0] = this_reg;
17403 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17406 xops[0] = XEXP (DECL_RTL (function), 0);
17409 if (!flag_pic || (*targetm.binds_local_p) (function))
17410 output_asm_insn ("jmp\t%P0", xops);
17413 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17414 tmp = gen_rtx_CONST (Pmode, tmp);
17415 tmp = gen_rtx_MEM (QImode, tmp);
17417 output_asm_insn ("jmp\t%A0", xops);
17422 if (!flag_pic || (*targetm.binds_local_p) (function))
17423 output_asm_insn ("jmp\t%P0", xops);
17428 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17429 tmp = (gen_rtx_SYMBOL_REF
17431 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17432 tmp = gen_rtx_MEM (QImode, tmp);
17434 output_asm_insn ("jmp\t%0", xops);
17437 #endif /* TARGET_MACHO */
17439 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17440 output_set_got (tmp, NULL_RTX);
17443 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17444 output_asm_insn ("jmp\t{*}%1", xops);
17450 x86_file_start (void)
17452 default_file_start ();
17453 if (X86_FILE_START_VERSION_DIRECTIVE)
17454 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17455 if (X86_FILE_START_FLTUSED)
17456 fputs ("\t.global\t__fltused\n", asm_out_file);
17457 if (ix86_asm_dialect == ASM_INTEL)
17458 fputs ("\t.intel_syntax\n", asm_out_file);
17462 x86_field_alignment (tree field, int computed)
17464 enum machine_mode mode;
17465 tree type = TREE_TYPE (field);
17467 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17469 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17470 ? get_inner_array_type (type) : type);
17471 if (mode == DFmode || mode == DCmode
17472 || GET_MODE_CLASS (mode) == MODE_INT
17473 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17474 return MIN (32, computed);
17478 /* Output assembler code to FILE to increment profiler label # LABELNO
17479 for profiling a function entry. */
17481 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17486 #ifndef NO_PROFILE_COUNTERS
17487 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17489 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17493 #ifndef NO_PROFILE_COUNTERS
17494 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17496 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17500 #ifndef NO_PROFILE_COUNTERS
17501 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17502 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17504 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17508 #ifndef NO_PROFILE_COUNTERS
17509 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17510 PROFILE_COUNT_REGISTER);
17512 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17516 /* We don't have exact information about the insn sizes, but we may assume
17517 quite safely that we are informed about all 1 byte insns and memory
17518 address sizes. This is enough to eliminate unnecessary padding in
17522 min_insn_size (rtx insn)
17526 if (!INSN_P (insn) || !active_insn_p (insn))
17529 /* Discard alignments we've emit and jump instructions. */
17530 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17531 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17533 if (GET_CODE (insn) == JUMP_INSN
17534 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17535 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17538 /* Important case - calls are always 5 bytes.
17539 It is common to have many calls in the row. */
17540 if (GET_CODE (insn) == CALL_INSN
17541 && symbolic_reference_mentioned_p (PATTERN (insn))
17542 && !SIBLING_CALL_P (insn))
17544 if (get_attr_length (insn) <= 1)
17547 /* For normal instructions we may rely on the sizes of addresses
17548 and the presence of symbol to require 4 bytes of encoding.
17549 This is not the case for jumps where references are PC relative. */
17550 if (GET_CODE (insn) != JUMP_INSN)
17552 l = get_attr_length_address (insn);
17553 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17562 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17566 ix86_avoid_jump_misspredicts (void)
17568 rtx insn, start = get_insns ();
17569 int nbytes = 0, njumps = 0;
17572 /* Look for all minimal intervals of instructions containing 4 jumps.
17573 The intervals are bounded by START and INSN. NBYTES is the total
17574 size of instructions in the interval including INSN and not including
17575 START. When the NBYTES is smaller than 16 bytes, it is possible
17576 that the end of START and INSN ends up in the same 16byte page.
17578 The smallest offset in the page INSN can start is the case where START
17579 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17580 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17582 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17585 nbytes += min_insn_size (insn);
17587 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17588 INSN_UID (insn), min_insn_size (insn));
17589 if ((GET_CODE (insn) == JUMP_INSN
17590 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17591 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17592 || GET_CODE (insn) == CALL_INSN)
17599 start = NEXT_INSN (start);
17600 if ((GET_CODE (start) == JUMP_INSN
17601 && GET_CODE (PATTERN (start)) != ADDR_VEC
17602 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17603 || GET_CODE (start) == CALL_INSN)
17604 njumps--, isjump = 1;
17607 nbytes -= min_insn_size (start);
17609 gcc_assert (njumps >= 0);
17611 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17612 INSN_UID (start), INSN_UID (insn), nbytes);
17614 if (njumps == 3 && isjump && nbytes < 16)
17616 int padsize = 15 - nbytes + min_insn_size (insn);
17619 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17620 INSN_UID (insn), padsize);
17621 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17626 /* AMD Athlon works faster
17627 when RET is not destination of conditional jump or directly preceded
17628 by other jump instruction. We avoid the penalty by inserting NOP just
17629 before the RET instructions in such cases. */
17631 ix86_pad_returns (void)
17636 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17638 basic_block bb = e->src;
17639 rtx ret = BB_END (bb);
17641 bool replace = false;
17643 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17644 || !maybe_hot_bb_p (bb))
17646 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17647 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17649 if (prev && GET_CODE (prev) == CODE_LABEL)
17654 FOR_EACH_EDGE (e, ei, bb->preds)
17655 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17656 && !(e->flags & EDGE_FALLTHRU))
17661 prev = prev_active_insn (ret);
17663 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17664 || GET_CODE (prev) == CALL_INSN))
17666 /* Empty functions get branch mispredict even when the jump destination
17667 is not visible to us. */
17668 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17673 emit_insn_before (gen_return_internal_long (), ret);
17679 /* Implement machine specific optimizations. We implement padding of returns
17680 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17684 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17685 ix86_pad_returns ();
17686 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17687 ix86_avoid_jump_misspredicts ();
17690 /* Return nonzero when QImode register that must be represented via REX prefix
17693 x86_extended_QIreg_mentioned_p (rtx insn)
17696 extract_insn_cached (insn);
17697 for (i = 0; i < recog_data.n_operands; i++)
17698 if (REG_P (recog_data.operand[i])
17699 && REGNO (recog_data.operand[i]) >= 4)
17704 /* Return nonzero when P points to register encoded via REX prefix.
17705 Called via for_each_rtx. */
17707 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17709 unsigned int regno;
17712 regno = REGNO (*p);
17713 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17716 /* Return true when INSN mentions register that must be encoded using REX
17719 x86_extended_reg_mentioned_p (rtx insn)
17721 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17724 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17725 optabs would emit if we didn't have TFmode patterns. */
17728 x86_emit_floatuns (rtx operands[2])
17730 rtx neglab, donelab, i0, i1, f0, in, out;
17731 enum machine_mode mode, inmode;
17733 inmode = GET_MODE (operands[1]);
17734 gcc_assert (inmode == SImode || inmode == DImode);
17737 in = force_reg (inmode, operands[1]);
17738 mode = GET_MODE (out);
17739 neglab = gen_label_rtx ();
17740 donelab = gen_label_rtx ();
17741 i1 = gen_reg_rtx (Pmode);
17742 f0 = gen_reg_rtx (mode);
17744 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17746 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17747 emit_jump_insn (gen_jump (donelab));
17750 emit_label (neglab);
17752 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17753 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17754 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17755 expand_float (f0, i0, 0);
17756 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17758 emit_label (donelab);
17761 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17762 with all elements equal to VAR. Return true if successful. */
17765 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17766 rtx target, rtx val)
17768 enum machine_mode smode, wsmode, wvmode;
17775 if (!mmx_ok && !TARGET_SSE)
17783 val = force_reg (GET_MODE_INNER (mode), val);
17784 x = gen_rtx_VEC_DUPLICATE (mode, val);
17785 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17791 if (TARGET_SSE || TARGET_3DNOW_A)
17793 val = gen_lowpart (SImode, val);
17794 x = gen_rtx_TRUNCATE (HImode, val);
17795 x = gen_rtx_VEC_DUPLICATE (mode, x);
17796 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17825 /* Replicate the value once into the next wider mode and recurse. */
17826 val = convert_modes (wsmode, smode, val, true);
17827 x = expand_simple_binop (wsmode, ASHIFT, val,
17828 GEN_INT (GET_MODE_BITSIZE (smode)),
17829 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17830 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17832 x = gen_reg_rtx (wvmode);
17833 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17834 gcc_unreachable ();
17835 emit_move_insn (target, gen_lowpart (mode, x));
17843 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17844 whose low element is VAR, and other elements are zero. Return true
17848 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17849 rtx target, rtx var)
17851 enum machine_mode vsimode;
17858 if (!mmx_ok && !TARGET_SSE)
17864 var = force_reg (GET_MODE_INNER (mode), var);
17865 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17866 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17871 var = force_reg (GET_MODE_INNER (mode), var);
17872 x = gen_rtx_VEC_DUPLICATE (mode, var);
17873 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17874 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17879 vsimode = V4SImode;
17885 vsimode = V2SImode;
17888 /* Zero extend the variable element to SImode and recurse. */
17889 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17891 x = gen_reg_rtx (vsimode);
17892 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17893 gcc_unreachable ();
17895 emit_move_insn (target, gen_lowpart (mode, x));
17903 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17904 consisting of the values in VALS. It is known that all elements
17905 except ONE_VAR are constants. Return true if successful. */
17908 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17909 rtx target, rtx vals, int one_var)
17911 rtx var = XVECEXP (vals, 0, one_var);
17912 enum machine_mode wmode;
17915 const_vec = copy_rtx (vals);
17916 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17917 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17925 /* For the two element vectors, it's just as easy to use
17926 the general case. */
17942 /* There's no way to set one QImode entry easily. Combine
17943 the variable value with its adjacent constant value, and
17944 promote to an HImode set. */
17945 x = XVECEXP (vals, 0, one_var ^ 1);
17948 var = convert_modes (HImode, QImode, var, true);
17949 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17950 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17951 x = GEN_INT (INTVAL (x) & 0xff);
17955 var = convert_modes (HImode, QImode, var, true);
17956 x = gen_int_mode (INTVAL (x) << 8, HImode);
17958 if (x != const0_rtx)
17959 var = expand_simple_binop (HImode, IOR, var, x, var,
17960 1, OPTAB_LIB_WIDEN);
17962 x = gen_reg_rtx (wmode);
17963 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17964 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17966 emit_move_insn (target, gen_lowpart (mode, x));
17973 emit_move_insn (target, const_vec);
17974 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17978 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17979 all values variable, and none identical. */
17982 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17983 rtx target, rtx vals)
17985 enum machine_mode half_mode = GET_MODE_INNER (mode);
17986 rtx op0 = NULL, op1 = NULL;
17987 bool use_vec_concat = false;
17993 if (!mmx_ok && !TARGET_SSE)
17999 /* For the two element vectors, we always implement VEC_CONCAT. */
18000 op0 = XVECEXP (vals, 0, 0);
18001 op1 = XVECEXP (vals, 0, 1);
18002 use_vec_concat = true;
18006 half_mode = V2SFmode;
18009 half_mode = V2SImode;
18015 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18016 Recurse to load the two halves. */
18018 op0 = gen_reg_rtx (half_mode);
18019 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18020 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18022 op1 = gen_reg_rtx (half_mode);
18023 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18024 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18026 use_vec_concat = true;
18037 gcc_unreachable ();
18040 if (use_vec_concat)
18042 if (!register_operand (op0, half_mode))
18043 op0 = force_reg (half_mode, op0);
18044 if (!register_operand (op1, half_mode))
18045 op1 = force_reg (half_mode, op1);
18047 emit_insn (gen_rtx_SET (VOIDmode, target,
18048 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18052 int i, j, n_elts, n_words, n_elt_per_word;
18053 enum machine_mode inner_mode;
18054 rtx words[4], shift;
18056 inner_mode = GET_MODE_INNER (mode);
18057 n_elts = GET_MODE_NUNITS (mode);
18058 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18059 n_elt_per_word = n_elts / n_words;
18060 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18062 for (i = 0; i < n_words; ++i)
18064 rtx word = NULL_RTX;
18066 for (j = 0; j < n_elt_per_word; ++j)
18068 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18069 elt = convert_modes (word_mode, inner_mode, elt, true);
18075 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18076 word, 1, OPTAB_LIB_WIDEN);
18077 word = expand_simple_binop (word_mode, IOR, word, elt,
18078 word, 1, OPTAB_LIB_WIDEN);
18086 emit_move_insn (target, gen_lowpart (mode, words[0]));
18087 else if (n_words == 2)
18089 rtx tmp = gen_reg_rtx (mode);
18090 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18091 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18092 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18093 emit_move_insn (target, tmp);
18095 else if (n_words == 4)
18097 rtx tmp = gen_reg_rtx (V4SImode);
18098 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18099 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18100 emit_move_insn (target, gen_lowpart (mode, tmp));
18103 gcc_unreachable ();
18107 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18108 instructions unless MMX_OK is true. */
18111 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18113 enum machine_mode mode = GET_MODE (target);
18114 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18115 int n_elts = GET_MODE_NUNITS (mode);
18116 int n_var = 0, one_var = -1;
18117 bool all_same = true, all_const_zero = true;
18121 for (i = 0; i < n_elts; ++i)
18123 x = XVECEXP (vals, 0, i);
18124 if (!CONSTANT_P (x))
18125 n_var++, one_var = i;
18126 else if (x != CONST0_RTX (inner_mode))
18127 all_const_zero = false;
18128 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18132 /* Constants are best loaded from the constant pool. */
18135 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18139 /* If all values are identical, broadcast the value. */
18141 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18142 XVECEXP (vals, 0, 0)))
18145 /* Values where only one field is non-constant are best loaded from
18146 the pool and overwritten via move later. */
18149 if (all_const_zero && one_var == 0
18150 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
18151 XVECEXP (vals, 0, 0)))
18154 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18158 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18162 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18164 enum machine_mode mode = GET_MODE (target);
18165 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18166 bool use_vec_merge = false;
18175 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18176 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18178 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18180 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18181 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18191 /* For the two element vectors, we implement a VEC_CONCAT with
18192 the extraction of the other element. */
18194 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18195 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18198 op0 = val, op1 = tmp;
18200 op0 = tmp, op1 = val;
18202 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18203 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18211 use_vec_merge = true;
18215 /* tmp = target = A B C D */
18216 tmp = copy_to_reg (target);
18217 /* target = A A B B */
18218 emit_insn (gen_sse_unpcklps (target, target, target));
18219 /* target = X A B B */
18220 ix86_expand_vector_set (false, target, val, 0);
18221 /* target = A X C D */
18222 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18223 GEN_INT (1), GEN_INT (0),
18224 GEN_INT (2+4), GEN_INT (3+4)));
18228 /* tmp = target = A B C D */
18229 tmp = copy_to_reg (target);
18230 /* tmp = X B C D */
18231 ix86_expand_vector_set (false, tmp, val, 0);
18232 /* target = A B X D */
18233 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18234 GEN_INT (0), GEN_INT (1),
18235 GEN_INT (0+4), GEN_INT (3+4)));
18239 /* tmp = target = A B C D */
18240 tmp = copy_to_reg (target);
18241 /* tmp = X B C D */
18242 ix86_expand_vector_set (false, tmp, val, 0);
18243 /* target = A B X D */
18244 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18245 GEN_INT (0), GEN_INT (1),
18246 GEN_INT (2+4), GEN_INT (0+4)));
18250 gcc_unreachable ();
18255 /* Element 0 handled by vec_merge below. */
18258 use_vec_merge = true;
18264 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18265 store into element 0, then shuffle them back. */
18269 order[0] = GEN_INT (elt);
18270 order[1] = const1_rtx;
18271 order[2] = const2_rtx;
18272 order[3] = GEN_INT (3);
18273 order[elt] = const0_rtx;
18275 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18276 order[1], order[2], order[3]));
18278 ix86_expand_vector_set (false, target, val, 0);
18280 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18281 order[1], order[2], order[3]));
18285 /* For SSE1, we have to reuse the V4SF code. */
18286 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18287 gen_lowpart (SFmode, val), elt);
18292 use_vec_merge = TARGET_SSE2;
18295 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18306 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18307 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18308 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18312 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18314 emit_move_insn (mem, target);
18316 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18317 emit_move_insn (tmp, val);
18319 emit_move_insn (target, mem);
18324 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18326 enum machine_mode mode = GET_MODE (vec);
18327 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18328 bool use_vec_extr = false;
18341 use_vec_extr = true;
18353 tmp = gen_reg_rtx (mode);
18354 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18355 GEN_INT (elt), GEN_INT (elt),
18356 GEN_INT (elt+4), GEN_INT (elt+4)));
18360 tmp = gen_reg_rtx (mode);
18361 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18365 gcc_unreachable ();
18368 use_vec_extr = true;
18383 tmp = gen_reg_rtx (mode);
18384 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18385 GEN_INT (elt), GEN_INT (elt),
18386 GEN_INT (elt), GEN_INT (elt)));
18390 tmp = gen_reg_rtx (mode);
18391 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18395 gcc_unreachable ();
18398 use_vec_extr = true;
18403 /* For SSE1, we have to reuse the V4SF code. */
18404 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18405 gen_lowpart (V4SFmode, vec), elt);
18411 use_vec_extr = TARGET_SSE2;
18414 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18419 /* ??? Could extract the appropriate HImode element and shift. */
18426 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18427 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18429 /* Let the rtl optimizers know about the zero extension performed. */
18430 if (inner_mode == HImode)
18432 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18433 target = gen_lowpart (SImode, target);
18436 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18440 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18442 emit_move_insn (mem, vec);
18444 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18445 emit_move_insn (target, tmp);
18449 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18450 pattern to reduce; DEST is the destination; IN is the input vector. */
18453 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18455 rtx tmp1, tmp2, tmp3;
18457 tmp1 = gen_reg_rtx (V4SFmode);
18458 tmp2 = gen_reg_rtx (V4SFmode);
18459 tmp3 = gen_reg_rtx (V4SFmode);
18461 emit_insn (gen_sse_movhlps (tmp1, in, in));
18462 emit_insn (fn (tmp2, tmp1, in));
18464 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18465 GEN_INT (1), GEN_INT (1),
18466 GEN_INT (1+4), GEN_INT (1+4)));
18467 emit_insn (fn (dest, tmp2, tmp3));
18470 /* Target hook for scalar_mode_supported_p. */
18472 ix86_scalar_mode_supported_p (enum machine_mode mode)
18474 if (DECIMAL_FLOAT_MODE_P (mode))
18477 return default_scalar_mode_supported_p (mode);
18480 /* Implements target hook vector_mode_supported_p. */
18482 ix86_vector_mode_supported_p (enum machine_mode mode)
18484 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18486 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18488 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18490 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18495 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18497 We do this in the new i386 backend to maintain source compatibility
18498 with the old cc0-based compiler. */
18501 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18502 tree inputs ATTRIBUTE_UNUSED,
18505 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18507 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18509 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18514 /* Return true if this goes in small data/bss. */
18517 ix86_in_large_data_p (tree exp)
18519 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18522 /* Functions are never large data. */
18523 if (TREE_CODE (exp) == FUNCTION_DECL)
18526 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18528 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18529 if (strcmp (section, ".ldata") == 0
18530 || strcmp (section, ".lbss") == 0)
18536 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18538 /* If this is an incomplete type with size 0, then we can't put it
18539 in data because it might be too big when completed. */
18540 if (!size || size > ix86_section_threshold)
18547 ix86_encode_section_info (tree decl, rtx rtl, int first)
18549 default_encode_section_info (decl, rtl, first);
18551 if (TREE_CODE (decl) == VAR_DECL
18552 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18553 && ix86_in_large_data_p (decl))
18554 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18557 /* Worker function for REVERSE_CONDITION. */
18560 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18562 return (mode != CCFPmode && mode != CCFPUmode
18563 ? reverse_condition (code)
18564 : reverse_condition_maybe_unordered (code));
18567 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18571 output_387_reg_move (rtx insn, rtx *operands)
18573 if (REG_P (operands[1])
18574 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18576 if (REGNO (operands[0]) == FIRST_STACK_REG
18577 && TARGET_USE_FFREEP)
18578 return "ffreep\t%y0";
18579 return "fstp\t%y0";
18581 if (STACK_TOP_P (operands[0]))
18582 return "fld%z1\t%y1";
18586 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18587 FP status register is set. */
18590 ix86_emit_fp_unordered_jump (rtx label)
18592 rtx reg = gen_reg_rtx (HImode);
18595 emit_insn (gen_x86_fnstsw_1 (reg));
18597 if (TARGET_USE_SAHF)
18599 emit_insn (gen_x86_sahf_1 (reg));
18601 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18602 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18606 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18608 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18609 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18612 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18613 gen_rtx_LABEL_REF (VOIDmode, label),
18615 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18616 emit_jump_insn (temp);
18619 /* Output code to perform a log1p XFmode calculation. */
18621 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18623 rtx label1 = gen_label_rtx ();
18624 rtx label2 = gen_label_rtx ();
18626 rtx tmp = gen_reg_rtx (XFmode);
18627 rtx tmp2 = gen_reg_rtx (XFmode);
18629 emit_insn (gen_absxf2 (tmp, op1));
18630 emit_insn (gen_cmpxf (tmp,
18631 CONST_DOUBLE_FROM_REAL_VALUE (
18632 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18634 emit_jump_insn (gen_bge (label1));
18636 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18637 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18638 emit_jump (label2);
18640 emit_label (label1);
18641 emit_move_insn (tmp, CONST1_RTX (XFmode));
18642 emit_insn (gen_addxf3 (tmp, op1, tmp));
18643 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18644 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18646 emit_label (label2);
18649 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18652 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18655 /* With Binutils 2.15, the "@unwind" marker must be specified on
18656 every occurrence of the ".eh_frame" section, not just the first
18659 && strcmp (name, ".eh_frame") == 0)
18661 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18662 flags & SECTION_WRITE ? "aw" : "a");
18665 default_elf_asm_named_section (name, flags, decl);
18668 /* Return the mangling of TYPE if it is an extended fundamental type. */
18670 static const char *
18671 ix86_mangle_fundamental_type (tree type)
18673 switch (TYPE_MODE (type))
18676 /* __float128 is "g". */
18679 /* "long double" or __float80 is "e". */
18686 /* For 32-bit code we can save PIC register setup by using
18687 __stack_chk_fail_local hidden function instead of calling
18688 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18689 register, so it is better to call __stack_chk_fail directly. */
18692 ix86_stack_protect_fail (void)
18694 return TARGET_64BIT
18695 ? default_external_stack_protect_fail ()
18696 : default_hidden_stack_protect_fail ();
18699 /* Select a format to encode pointers in exception handling data. CODE
18700 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18701 true if the symbol may be affected by dynamic relocations.
18703 ??? All x86 object file formats are capable of representing this.
18704 After all, the relocation needed is the same as for the call insn.
18705 Whether or not a particular assembler allows us to enter such, I
18706 guess we'll have to see. */
18708 asm_preferred_eh_data_format (int code, int global)
18712 int type = DW_EH_PE_sdata8;
18714 || ix86_cmodel == CM_SMALL_PIC
18715 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18716 type = DW_EH_PE_sdata4;
18717 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18719 if (ix86_cmodel == CM_SMALL
18720 || (ix86_cmodel == CM_MEDIUM && code))
18721 return DW_EH_PE_udata4;
18722 return DW_EH_PE_absptr;
18725 #include "gt-i386.h"