1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx legitimize_dllimport_symbol (rtx, bool);
68 #ifndef CHECK_STACK_LIMIT
69 #define CHECK_STACK_LIMIT (-1)
72 /* Return index of given mode in mult and division cost tables. */
73 #define MODE_INDEX(mode) \
74 ((mode) == QImode ? 0 \
75 : (mode) == HImode ? 1 \
76 : (mode) == SImode ? 2 \
77 : (mode) == DImode ? 3 \
80 /* Processor costs (relative to an add) */
81 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
82 #define COSTS_N_BYTES(N) ((N) * 2)
84 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
87 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
88 COSTS_N_BYTES (2), /* cost of an add instruction */
89 COSTS_N_BYTES (3), /* cost of a lea instruction */
90 COSTS_N_BYTES (2), /* variable shift costs */
91 COSTS_N_BYTES (3), /* constant shift costs */
92 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
93 COSTS_N_BYTES (3), /* HI */
94 COSTS_N_BYTES (3), /* SI */
95 COSTS_N_BYTES (3), /* DI */
96 COSTS_N_BYTES (5)}, /* other */
97 0, /* cost of multiply per each bit set */
98 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
99 COSTS_N_BYTES (3), /* HI */
100 COSTS_N_BYTES (3), /* SI */
101 COSTS_N_BYTES (3), /* DI */
102 COSTS_N_BYTES (5)}, /* other */
103 COSTS_N_BYTES (3), /* cost of movsx */
104 COSTS_N_BYTES (3), /* cost of movzx */
105 0, /* "large" insn */
107 2, /* cost for loading QImode using movzbl */
108 {2, 2, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 2, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 2}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {2, 2, 2}, /* cost of storing fp registers
116 in SFmode, DFmode and XFmode */
117 3, /* cost of moving MMX register */
118 {3, 3}, /* cost of loading MMX registers
119 in SImode and DImode */
120 {3, 3}, /* cost of storing MMX registers
121 in SImode and DImode */
122 3, /* cost of moving SSE register */
123 {3, 3, 3}, /* cost of loading SSE registers
124 in SImode, DImode and TImode */
125 {3, 3, 3}, /* cost of storing SSE registers
126 in SImode, DImode and TImode */
127 3, /* MMX or SSE register to integer */
128 0, /* size of l1 cache */
129 0, /* size of l2 cache */
130 0, /* size of prefetch block */
131 0, /* number of parallel prefetches */
133 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
134 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
135 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
136 COSTS_N_BYTES (2), /* cost of FABS instruction. */
137 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
138 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
139 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
140 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}},
141 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}},
143 1, /* scalar_stmt_cost. */
144 1, /* scalar load_cost. */
145 1, /* scalar_store_cost. */
146 1, /* vec_stmt_cost. */
147 1, /* vec_to_scalar_cost. */
148 1, /* scalar_to_vec_cost. */
149 1, /* vec_align_load_cost. */
150 1, /* vec_unalign_load_cost. */
151 1, /* vec_store_cost. */
152 1, /* cond_taken_branch_cost. */
153 1, /* cond_not_taken_branch_cost. */
156 /* Processor costs (relative to an add) */
158 struct processor_costs i386_cost = { /* 386 specific costs */
159 COSTS_N_INSNS (1), /* cost of an add instruction */
160 COSTS_N_INSNS (1), /* cost of a lea instruction */
161 COSTS_N_INSNS (3), /* variable shift costs */
162 COSTS_N_INSNS (2), /* constant shift costs */
163 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
164 COSTS_N_INSNS (6), /* HI */
165 COSTS_N_INSNS (6), /* SI */
166 COSTS_N_INSNS (6), /* DI */
167 COSTS_N_INSNS (6)}, /* other */
168 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
169 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
170 COSTS_N_INSNS (23), /* HI */
171 COSTS_N_INSNS (23), /* SI */
172 COSTS_N_INSNS (23), /* DI */
173 COSTS_N_INSNS (23)}, /* other */
174 COSTS_N_INSNS (3), /* cost of movsx */
175 COSTS_N_INSNS (2), /* cost of movzx */
176 15, /* "large" insn */
178 4, /* cost for loading QImode using movzbl */
179 {2, 4, 2}, /* cost of loading integer registers
180 in QImode, HImode and SImode.
181 Relative to reg-reg move (2). */
182 {2, 4, 2}, /* cost of storing integer registers */
183 2, /* cost of reg,reg fld/fst */
184 {8, 8, 8}, /* cost of loading fp registers
185 in SFmode, DFmode and XFmode */
186 {8, 8, 8}, /* cost of storing fp registers
187 in SFmode, DFmode and XFmode */
188 2, /* cost of moving MMX register */
189 {4, 8}, /* cost of loading MMX registers
190 in SImode and DImode */
191 {4, 8}, /* cost of storing MMX registers
192 in SImode and DImode */
193 2, /* cost of moving SSE register */
194 {4, 8, 16}, /* cost of loading SSE registers
195 in SImode, DImode and TImode */
196 {4, 8, 16}, /* cost of storing SSE registers
197 in SImode, DImode and TImode */
198 3, /* MMX or SSE register to integer */
199 0, /* size of l1 cache */
200 0, /* size of l2 cache */
201 0, /* size of prefetch block */
202 0, /* number of parallel prefetches */
204 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
205 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
206 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
207 COSTS_N_INSNS (22), /* cost of FABS instruction. */
208 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
209 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
210 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
211 DUMMY_STRINGOP_ALGS},
212 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
213 DUMMY_STRINGOP_ALGS},
214 1, /* scalar_stmt_cost. */
215 1, /* scalar load_cost. */
216 1, /* scalar_store_cost. */
217 1, /* vec_stmt_cost. */
218 1, /* vec_to_scalar_cost. */
219 1, /* scalar_to_vec_cost. */
220 1, /* vec_align_load_cost. */
221 2, /* vec_unalign_load_cost. */
222 1, /* vec_store_cost. */
223 3, /* cond_taken_branch_cost. */
224 1, /* cond_not_taken_branch_cost. */
228 struct processor_costs i486_cost = { /* 486 specific costs */
229 COSTS_N_INSNS (1), /* cost of an add instruction */
230 COSTS_N_INSNS (1), /* cost of a lea instruction */
231 COSTS_N_INSNS (3), /* variable shift costs */
232 COSTS_N_INSNS (2), /* constant shift costs */
233 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
234 COSTS_N_INSNS (12), /* HI */
235 COSTS_N_INSNS (12), /* SI */
236 COSTS_N_INSNS (12), /* DI */
237 COSTS_N_INSNS (12)}, /* other */
238 1, /* cost of multiply per each bit set */
239 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
240 COSTS_N_INSNS (40), /* HI */
241 COSTS_N_INSNS (40), /* SI */
242 COSTS_N_INSNS (40), /* DI */
243 COSTS_N_INSNS (40)}, /* other */
244 COSTS_N_INSNS (3), /* cost of movsx */
245 COSTS_N_INSNS (2), /* cost of movzx */
246 15, /* "large" insn */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {4, 8, 16}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {4, 8, 16}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 3, /* MMX or SSE register to integer */
269 4, /* size of l1 cache. 486 has 8kB cache
270 shared for code and data, so 4kB is
271 not really precise. */
272 4, /* size of l2 cache */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (3), /* cost of FABS instruction. */
280 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
282 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
283 DUMMY_STRINGOP_ALGS},
284 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
285 DUMMY_STRINGOP_ALGS},
286 1, /* scalar_stmt_cost. */
287 1, /* scalar load_cost. */
288 1, /* scalar_store_cost. */
289 1, /* vec_stmt_cost. */
290 1, /* vec_to_scalar_cost. */
291 1, /* scalar_to_vec_cost. */
292 1, /* vec_align_load_cost. */
293 2, /* vec_unalign_load_cost. */
294 1, /* vec_store_cost. */
295 3, /* cond_taken_branch_cost. */
296 1, /* cond_not_taken_branch_cost. */
300 struct processor_costs pentium_cost = {
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (4), /* variable shift costs */
304 COSTS_N_INSNS (1), /* constant shift costs */
305 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (11), /* HI */
307 COSTS_N_INSNS (11), /* SI */
308 COSTS_N_INSNS (11), /* DI */
309 COSTS_N_INSNS (11)}, /* other */
310 0, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (25), /* HI */
313 COSTS_N_INSNS (25), /* SI */
314 COSTS_N_INSNS (25), /* DI */
315 COSTS_N_INSNS (25)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 8, /* "large" insn */
320 6, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {2, 2, 6}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {4, 4, 6}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 8, /* cost of moving MMX register */
331 {8, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {8, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 8, /* size of l1 cache. */
342 8, /* size of l2 cache */
343 0, /* size of prefetch block */
344 0, /* number of parallel prefetches */
346 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
347 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
348 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
349 COSTS_N_INSNS (1), /* cost of FABS instruction. */
350 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
351 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
352 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
353 DUMMY_STRINGOP_ALGS},
354 {{libcall, {{-1, rep_prefix_4_byte, false}}},
355 DUMMY_STRINGOP_ALGS},
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
370 struct processor_costs pentiumpro_cost = {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (1), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (4), /* HI */
377 COSTS_N_INSNS (4), /* SI */
378 COSTS_N_INSNS (4), /* DI */
379 COSTS_N_INSNS (4)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (17), /* HI */
383 COSTS_N_INSNS (17), /* SI */
384 COSTS_N_INSNS (17), /* DI */
385 COSTS_N_INSNS (17)}, /* other */
386 COSTS_N_INSNS (1), /* cost of movsx */
387 COSTS_N_INSNS (1), /* cost of movzx */
388 8, /* "large" insn */
390 2, /* cost for loading QImode using movzbl */
391 {4, 4, 4}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 2, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 2, /* cost of moving MMX register */
401 {2, 2}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {2, 2}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {2, 2, 8}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {2, 2, 8}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 256, /* size of l2 cache */
413 32, /* size of prefetch block */
414 6, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
422 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
423 (we ensure the alignment). For small blocks inline loop is still a
424 noticeable win, for bigger blocks either rep movsl or rep movsb is
425 way to go. Rep movsb has apparently more expensive startup time in CPU,
426 but after 4K the difference is down in the noise. */
427 {{rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
428 {8192, rep_prefix_4_byte, false},
429 {-1, rep_prefix_1_byte, false}}},
430 DUMMY_STRINGOP_ALGS},
431 {{rep_prefix_4_byte, {{1024, unrolled_loop, false},
432 {8192, rep_prefix_4_byte, false},
433 {-1, libcall, false}}},
434 DUMMY_STRINGOP_ALGS},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost = {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
503 DUMMY_STRINGOP_ALGS},
504 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
505 DUMMY_STRINGOP_ALGS},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost = {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS},
577 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
578 DUMMY_STRINGOP_ALGS},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost = {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
649 DUMMY_STRINGOP_ALGS},
650 {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
651 DUMMY_STRINGOP_ALGS},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost = {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}},
730 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
731 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
732 {libcall, {{48, unrolled_loop, false},
733 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
734 4, /* scalar_stmt_cost. */
735 2, /* scalar load_cost. */
736 2, /* scalar_store_cost. */
737 5, /* vec_stmt_cost. */
738 0, /* vec_to_scalar_cost. */
739 2, /* scalar_to_vec_cost. */
740 2, /* vec_align_load_cost. */
741 3, /* vec_unalign_load_cost. */
742 3, /* vec_store_cost. */
743 3, /* cond_taken_branch_cost. */
744 2, /* cond_not_taken_branch_cost. */
747 struct processor_costs amdfam10_cost = {
748 COSTS_N_INSNS (1), /* cost of an add instruction */
749 COSTS_N_INSNS (2), /* cost of a lea instruction */
750 COSTS_N_INSNS (1), /* variable shift costs */
751 COSTS_N_INSNS (1), /* constant shift costs */
752 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
753 COSTS_N_INSNS (4), /* HI */
754 COSTS_N_INSNS (3), /* SI */
755 COSTS_N_INSNS (4), /* DI */
756 COSTS_N_INSNS (5)}, /* other */
757 0, /* cost of multiply per each bit set */
758 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
759 COSTS_N_INSNS (35), /* HI */
760 COSTS_N_INSNS (51), /* SI */
761 COSTS_N_INSNS (83), /* DI */
762 COSTS_N_INSNS (83)}, /* other */
763 COSTS_N_INSNS (1), /* cost of movsx */
764 COSTS_N_INSNS (1), /* cost of movzx */
765 8, /* "large" insn */
767 4, /* cost for loading QImode using movzbl */
768 {3, 4, 3}, /* cost of loading integer registers
769 in QImode, HImode and SImode.
770 Relative to reg-reg move (2). */
771 {3, 4, 3}, /* cost of storing integer registers */
772 4, /* cost of reg,reg fld/fst */
773 {4, 4, 12}, /* cost of loading fp registers
774 in SFmode, DFmode and XFmode */
775 {6, 6, 8}, /* cost of storing fp registers
776 in SFmode, DFmode and XFmode */
777 2, /* cost of moving MMX register */
778 {3, 3}, /* cost of loading MMX registers
779 in SImode and DImode */
780 {4, 4}, /* cost of storing MMX registers
781 in SImode and DImode */
782 2, /* cost of moving SSE register */
783 {4, 4, 3}, /* cost of loading SSE registers
784 in SImode, DImode and TImode */
785 {4, 4, 5}, /* cost of storing SSE registers
786 in SImode, DImode and TImode */
787 3, /* MMX or SSE register to integer */
789 MOVD reg64, xmmreg Double FSTORE 4
790 MOVD reg32, xmmreg Double FSTORE 4
792 MOVD reg64, xmmreg Double FADD 3
794 MOVD reg32, xmmreg Double FADD 3
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
804 100, /* number of parallel prefetches */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}},
820 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
821 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
822 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
823 {-1, libcall, false}}}},
824 4, /* scalar_stmt_cost. */
825 2, /* scalar load_cost. */
826 2, /* scalar_store_cost. */
827 6, /* vec_stmt_cost. */
828 0, /* vec_to_scalar_cost. */
829 2, /* scalar_to_vec_cost. */
830 2, /* vec_align_load_cost. */
831 2, /* vec_unalign_load_cost. */
832 2, /* vec_store_cost. */
833 2, /* cond_taken_branch_cost. */
834 1, /* cond_not_taken_branch_cost. */
837 struct processor_costs bdver1_cost = {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (1), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (4), /* SI */
845 COSTS_N_INSNS (6), /* DI */
846 COSTS_N_INSNS (6)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
857 4, /* cost for loading QImode using movzbl */
858 {5, 5, 4}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {4, 4, 4}, /* cost of storing integer registers */
862 2, /* cost of reg,reg fld/fst */
863 {5, 5, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {4, 4, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {4, 4}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 4}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 4}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 2, /* MMX or SSE register to integer */
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
882 MOVD reg64, xmmreg Double FADD 3
884 MOVD reg32, xmmreg Double FADD 3
886 16, /* size of l1 cache. */
887 2048, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
894 100, /* number of parallel prefetches */
896 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
907 {-1, rep_prefix_4_byte, false}}},
908 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
909 {-1, libcall, false}}}},
910 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
911 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
912 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}},
914 6, /* scalar_stmt_cost. */
915 4, /* scalar load_cost. */
916 4, /* scalar_store_cost. */
917 6, /* vec_stmt_cost. */
918 0, /* vec_to_scalar_cost. */
919 2, /* scalar_to_vec_cost. */
920 4, /* vec_align_load_cost. */
921 4, /* vec_unalign_load_cost. */
922 4, /* vec_store_cost. */
923 2, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 struct processor_costs bdver2_cost = {
928 COSTS_N_INSNS (1), /* cost of an add instruction */
929 COSTS_N_INSNS (1), /* cost of a lea instruction */
930 COSTS_N_INSNS (1), /* variable shift costs */
931 COSTS_N_INSNS (1), /* constant shift costs */
932 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
933 COSTS_N_INSNS (4), /* HI */
934 COSTS_N_INSNS (4), /* SI */
935 COSTS_N_INSNS (6), /* DI */
936 COSTS_N_INSNS (6)}, /* other */
937 0, /* cost of multiply per each bit set */
938 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
939 COSTS_N_INSNS (35), /* HI */
940 COSTS_N_INSNS (51), /* SI */
941 COSTS_N_INSNS (83), /* DI */
942 COSTS_N_INSNS (83)}, /* other */
943 COSTS_N_INSNS (1), /* cost of movsx */
944 COSTS_N_INSNS (1), /* cost of movzx */
945 8, /* "large" insn */
947 4, /* cost for loading QImode using movzbl */
948 {5, 5, 4}, /* cost of loading integer registers
949 in QImode, HImode and SImode.
950 Relative to reg-reg move (2). */
951 {4, 4, 4}, /* cost of storing integer registers */
952 2, /* cost of reg,reg fld/fst */
953 {5, 5, 12}, /* cost of loading fp registers
954 in SFmode, DFmode and XFmode */
955 {4, 4, 8}, /* cost of storing fp registers
956 in SFmode, DFmode and XFmode */
957 2, /* cost of moving MMX register */
958 {4, 4}, /* cost of loading MMX registers
959 in SImode and DImode */
960 {4, 4}, /* cost of storing MMX registers
961 in SImode and DImode */
962 2, /* cost of moving SSE register */
963 {4, 4, 4}, /* cost of loading SSE registers
964 in SImode, DImode and TImode */
965 {4, 4, 4}, /* cost of storing SSE registers
966 in SImode, DImode and TImode */
967 2, /* MMX or SSE register to integer */
969 MOVD reg64, xmmreg Double FSTORE 4
970 MOVD reg32, xmmreg Double FSTORE 4
972 MOVD reg64, xmmreg Double FADD 3
974 MOVD reg32, xmmreg Double FADD 3
976 16, /* size of l1 cache. */
977 2048, /* size of l2 cache. */
978 64, /* size of prefetch block */
979 /* New AMD processors never drop prefetches; if they cannot be performed
980 immediately, they are queued. We set number of simultaneous prefetches
981 to a large constant to reflect this (it probably is not a good idea not
982 to limit number of prefetches at all, as their execution also takes some
984 100, /* number of parallel prefetches */
986 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
987 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
988 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
989 COSTS_N_INSNS (2), /* cost of FABS instruction. */
990 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
991 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
993 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
994 very small blocks it is better to use loop. For large blocks, libcall
995 can do nontemporary accesses and beat inline considerably. */
996 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
997 {-1, rep_prefix_4_byte, false}}},
998 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
999 {-1, libcall, false}}}},
1000 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1001 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1002 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1003 {-1, libcall, false}}}},
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 2, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs bdver3_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 16, /* size of l1 cache. */
1059 2048, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 /* New AMD processors never drop prefetches; if they cannot be performed
1062 immediately, they are queued. We set number of simultaneous prefetches
1063 to a large constant to reflect this (it probably is not a good idea not
1064 to limit number of prefetches at all, as their execution also takes some
1066 100, /* number of parallel prefetches */
1067 2, /* Branch cost */
1068 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1069 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1070 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1071 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1072 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1073 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1075 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1078 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
1079 {-1, rep_prefix_4_byte, false}}},
1080 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1081 {-1, libcall, false}}}},
1082 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1083 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1084 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1085 {-1, libcall, false}}}},
1086 6, /* scalar_stmt_cost. */
1087 4, /* scalar load_cost. */
1088 4, /* scalar_store_cost. */
1089 6, /* vec_stmt_cost. */
1090 0, /* vec_to_scalar_cost. */
1091 2, /* scalar_to_vec_cost. */
1092 4, /* vec_align_load_cost. */
1093 4, /* vec_unalign_load_cost. */
1094 4, /* vec_store_cost. */
1095 2, /* cond_taken_branch_cost. */
1096 1, /* cond_not_taken_branch_cost. */
1099 struct processor_costs btver1_cost = {
1100 COSTS_N_INSNS (1), /* cost of an add instruction */
1101 COSTS_N_INSNS (2), /* cost of a lea instruction */
1102 COSTS_N_INSNS (1), /* variable shift costs */
1103 COSTS_N_INSNS (1), /* constant shift costs */
1104 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1105 COSTS_N_INSNS (4), /* HI */
1106 COSTS_N_INSNS (3), /* SI */
1107 COSTS_N_INSNS (4), /* DI */
1108 COSTS_N_INSNS (5)}, /* other */
1109 0, /* cost of multiply per each bit set */
1110 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1111 COSTS_N_INSNS (35), /* HI */
1112 COSTS_N_INSNS (51), /* SI */
1113 COSTS_N_INSNS (83), /* DI */
1114 COSTS_N_INSNS (83)}, /* other */
1115 COSTS_N_INSNS (1), /* cost of movsx */
1116 COSTS_N_INSNS (1), /* cost of movzx */
1117 8, /* "large" insn */
1119 4, /* cost for loading QImode using movzbl */
1120 {3, 4, 3}, /* cost of loading integer registers
1121 in QImode, HImode and SImode.
1122 Relative to reg-reg move (2). */
1123 {3, 4, 3}, /* cost of storing integer registers */
1124 4, /* cost of reg,reg fld/fst */
1125 {4, 4, 12}, /* cost of loading fp registers
1126 in SFmode, DFmode and XFmode */
1127 {6, 6, 8}, /* cost of storing fp registers
1128 in SFmode, DFmode and XFmode */
1129 2, /* cost of moving MMX register */
1130 {3, 3}, /* cost of loading MMX registers
1131 in SImode and DImode */
1132 {4, 4}, /* cost of storing MMX registers
1133 in SImode and DImode */
1134 2, /* cost of moving SSE register */
1135 {4, 4, 3}, /* cost of loading SSE registers
1136 in SImode, DImode and TImode */
1137 {4, 4, 5}, /* cost of storing SSE registers
1138 in SImode, DImode and TImode */
1139 3, /* MMX or SSE register to integer */
1141 MOVD reg64, xmmreg Double FSTORE 4
1142 MOVD reg32, xmmreg Double FSTORE 4
1144 MOVD reg64, xmmreg Double FADD 3
1146 MOVD reg32, xmmreg Double FADD 3
1148 32, /* size of l1 cache. */
1149 512, /* size of l2 cache. */
1150 64, /* size of prefetch block */
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
1164 {-1, rep_prefix_4_byte, false}}},
1165 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1166 {-1, libcall, false}}}},
1167 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1168 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1169 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1170 {-1, libcall, false}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs btver2_cost = {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (2), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (3), /* SI */
1192 COSTS_N_INSNS (4), /* DI */
1193 COSTS_N_INSNS (5)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {3, 4, 3}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {3, 4, 3}, /* cost of storing integer registers */
1209 4, /* cost of reg,reg fld/fst */
1210 {4, 4, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {6, 6, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {3, 3}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 3}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 5}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 3, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 32, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 100, /* number of parallel prefetches */
1237 2, /* Branch cost */
1238 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1239 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1240 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1241 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1242 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1243 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1245 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
1246 {-1, rep_prefix_4_byte, false}}},
1247 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1248 {-1, libcall, false}}}},
1249 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1250 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1251 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1252 {-1, libcall, false}}}},
1253 4, /* scalar_stmt_cost. */
1254 2, /* scalar load_cost. */
1255 2, /* scalar_store_cost. */
1256 6, /* vec_stmt_cost. */
1257 0, /* vec_to_scalar_cost. */
1258 2, /* scalar_to_vec_cost. */
1259 2, /* vec_align_load_cost. */
1260 2, /* vec_unalign_load_cost. */
1261 2, /* vec_store_cost. */
1262 2, /* cond_taken_branch_cost. */
1263 1, /* cond_not_taken_branch_cost. */
1267 struct processor_costs pentium4_cost = {
1268 COSTS_N_INSNS (1), /* cost of an add instruction */
1269 COSTS_N_INSNS (3), /* cost of a lea instruction */
1270 COSTS_N_INSNS (4), /* variable shift costs */
1271 COSTS_N_INSNS (4), /* constant shift costs */
1272 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1273 COSTS_N_INSNS (15), /* HI */
1274 COSTS_N_INSNS (15), /* SI */
1275 COSTS_N_INSNS (15), /* DI */
1276 COSTS_N_INSNS (15)}, /* other */
1277 0, /* cost of multiply per each bit set */
1278 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1279 COSTS_N_INSNS (56), /* HI */
1280 COSTS_N_INSNS (56), /* SI */
1281 COSTS_N_INSNS (56), /* DI */
1282 COSTS_N_INSNS (56)}, /* other */
1283 COSTS_N_INSNS (1), /* cost of movsx */
1284 COSTS_N_INSNS (1), /* cost of movzx */
1285 16, /* "large" insn */
1287 2, /* cost for loading QImode using movzbl */
1288 {4, 5, 4}, /* cost of loading integer registers
1289 in QImode, HImode and SImode.
1290 Relative to reg-reg move (2). */
1291 {2, 3, 2}, /* cost of storing integer registers */
1292 2, /* cost of reg,reg fld/fst */
1293 {2, 2, 6}, /* cost of loading fp registers
1294 in SFmode, DFmode and XFmode */
1295 {4, 4, 6}, /* cost of storing fp registers
1296 in SFmode, DFmode and XFmode */
1297 2, /* cost of moving MMX register */
1298 {2, 2}, /* cost of loading MMX registers
1299 in SImode and DImode */
1300 {2, 2}, /* cost of storing MMX registers
1301 in SImode and DImode */
1302 12, /* cost of moving SSE register */
1303 {12, 12, 12}, /* cost of loading SSE registers
1304 in SImode, DImode and TImode */
1305 {2, 2, 8}, /* cost of storing SSE registers
1306 in SImode, DImode and TImode */
1307 10, /* MMX or SSE register to integer */
1308 8, /* size of l1 cache. */
1309 256, /* size of l2 cache. */
1310 64, /* size of prefetch block */
1311 6, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1319 {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1320 DUMMY_STRINGOP_ALGS},
1321 {{libcall, {{6, loop_1_byte, false}, {48, loop, false},
1322 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1323 DUMMY_STRINGOP_ALGS},
1324 1, /* scalar_stmt_cost. */
1325 1, /* scalar load_cost. */
1326 1, /* scalar_store_cost. */
1327 1, /* vec_stmt_cost. */
1328 1, /* vec_to_scalar_cost. */
1329 1, /* scalar_to_vec_cost. */
1330 1, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 1, /* vec_store_cost. */
1333 3, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1338 struct processor_costs nocona_cost = {
1339 COSTS_N_INSNS (1), /* cost of an add instruction */
1340 COSTS_N_INSNS (1), /* cost of a lea instruction */
1341 COSTS_N_INSNS (1), /* variable shift costs */
1342 COSTS_N_INSNS (1), /* constant shift costs */
1343 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1344 COSTS_N_INSNS (10), /* HI */
1345 COSTS_N_INSNS (10), /* SI */
1346 COSTS_N_INSNS (10), /* DI */
1347 COSTS_N_INSNS (10)}, /* other */
1348 0, /* cost of multiply per each bit set */
1349 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1350 COSTS_N_INSNS (66), /* HI */
1351 COSTS_N_INSNS (66), /* SI */
1352 COSTS_N_INSNS (66), /* DI */
1353 COSTS_N_INSNS (66)}, /* other */
1354 COSTS_N_INSNS (1), /* cost of movsx */
1355 COSTS_N_INSNS (1), /* cost of movzx */
1356 16, /* "large" insn */
1357 17, /* MOVE_RATIO */
1358 4, /* cost for loading QImode using movzbl */
1359 {4, 4, 4}, /* cost of loading integer registers
1360 in QImode, HImode and SImode.
1361 Relative to reg-reg move (2). */
1362 {4, 4, 4}, /* cost of storing integer registers */
1363 3, /* cost of reg,reg fld/fst */
1364 {12, 12, 12}, /* cost of loading fp registers
1365 in SFmode, DFmode and XFmode */
1366 {4, 4, 4}, /* cost of storing fp registers
1367 in SFmode, DFmode and XFmode */
1368 6, /* cost of moving MMX register */
1369 {12, 12}, /* cost of loading MMX registers
1370 in SImode and DImode */
1371 {12, 12}, /* cost of storing MMX registers
1372 in SImode and DImode */
1373 6, /* cost of moving SSE register */
1374 {12, 12, 12}, /* cost of loading SSE registers
1375 in SImode, DImode and TImode */
1376 {12, 12, 12}, /* cost of storing SSE registers
1377 in SImode, DImode and TImode */
1378 8, /* MMX or SSE register to integer */
1379 8, /* size of l1 cache. */
1380 1024, /* size of l2 cache. */
1381 128, /* size of prefetch block */
1382 8, /* number of parallel prefetches */
1383 1, /* Branch cost */
1384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1385 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1386 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1387 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1388 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1389 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1390 {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1391 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1392 {100000, unrolled_loop, false}, {-1, libcall, false}}}},
1393 {{libcall, {{6, loop_1_byte, false}, {48, loop, false},
1394 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1395 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1396 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1397 1, /* scalar_stmt_cost. */
1398 1, /* scalar load_cost. */
1399 1, /* scalar_store_cost. */
1400 1, /* vec_stmt_cost. */
1401 1, /* vec_to_scalar_cost. */
1402 1, /* scalar_to_vec_cost. */
1403 1, /* vec_align_load_cost. */
1404 2, /* vec_unalign_load_cost. */
1405 1, /* vec_store_cost. */
1406 3, /* cond_taken_branch_cost. */
1407 1, /* cond_not_taken_branch_cost. */
1411 struct processor_costs atom_cost = {
1412 COSTS_N_INSNS (1), /* cost of an add instruction */
1413 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1414 COSTS_N_INSNS (1), /* variable shift costs */
1415 COSTS_N_INSNS (1), /* constant shift costs */
1416 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1417 COSTS_N_INSNS (4), /* HI */
1418 COSTS_N_INSNS (3), /* SI */
1419 COSTS_N_INSNS (4), /* DI */
1420 COSTS_N_INSNS (2)}, /* other */
1421 0, /* cost of multiply per each bit set */
1422 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1423 COSTS_N_INSNS (26), /* HI */
1424 COSTS_N_INSNS (42), /* SI */
1425 COSTS_N_INSNS (74), /* DI */
1426 COSTS_N_INSNS (74)}, /* other */
1427 COSTS_N_INSNS (1), /* cost of movsx */
1428 COSTS_N_INSNS (1), /* cost of movzx */
1429 8, /* "large" insn */
1430 17, /* MOVE_RATIO */
1431 4, /* cost for loading QImode using movzbl */
1432 {4, 4, 4}, /* cost of loading integer registers
1433 in QImode, HImode and SImode.
1434 Relative to reg-reg move (2). */
1435 {4, 4, 4}, /* cost of storing integer registers */
1436 4, /* cost of reg,reg fld/fst */
1437 {12, 12, 12}, /* cost of loading fp registers
1438 in SFmode, DFmode and XFmode */
1439 {6, 6, 8}, /* cost of storing fp registers
1440 in SFmode, DFmode and XFmode */
1441 2, /* cost of moving MMX register */
1442 {8, 8}, /* cost of loading MMX registers
1443 in SImode and DImode */
1444 {8, 8}, /* cost of storing MMX registers
1445 in SImode and DImode */
1446 2, /* cost of moving SSE register */
1447 {8, 8, 8}, /* cost of loading SSE registers
1448 in SImode, DImode and TImode */
1449 {8, 8, 8}, /* cost of storing SSE registers
1450 in SImode, DImode and TImode */
1451 5, /* MMX or SSE register to integer */
1452 32, /* size of l1 cache. */
1453 256, /* size of l2 cache. */
1454 64, /* size of prefetch block */
1455 6, /* number of parallel prefetches */
1456 3, /* Branch cost */
1457 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1458 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1459 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1460 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1461 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1462 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1463 {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1464 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1465 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1466 {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
1467 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1469 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1470 1, /* scalar_stmt_cost. */
1471 1, /* scalar load_cost. */
1472 1, /* scalar_store_cost. */
1473 1, /* vec_stmt_cost. */
1474 1, /* vec_to_scalar_cost. */
1475 1, /* scalar_to_vec_cost. */
1476 1, /* vec_align_load_cost. */
1477 2, /* vec_unalign_load_cost. */
1478 1, /* vec_store_cost. */
1479 3, /* cond_taken_branch_cost. */
1480 1, /* cond_not_taken_branch_cost. */
1484 struct processor_costs slm_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1487 COSTS_N_INSNS (1), /* variable shift costs */
1488 COSTS_N_INSNS (1), /* constant shift costs */
1489 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (4), /* HI */
1491 COSTS_N_INSNS (3), /* SI */
1492 COSTS_N_INSNS (4), /* DI */
1493 COSTS_N_INSNS (2)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (26), /* HI */
1497 COSTS_N_INSNS (42), /* SI */
1498 COSTS_N_INSNS (74), /* DI */
1499 COSTS_N_INSNS (74)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 8, /* "large" insn */
1503 17, /* MOVE_RATIO */
1504 4, /* cost for loading QImode using movzbl */
1505 {4, 4, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {4, 4, 4}, /* cost of storing integer registers */
1509 4, /* cost of reg,reg fld/fst */
1510 {12, 12, 12}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {6, 6, 8}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {8, 8}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {8, 8}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 2, /* cost of moving SSE register */
1520 {8, 8, 8}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {8, 8, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 5, /* MMX or SSE register to integer */
1525 32, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 3, /* Branch cost */
1530 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1536 {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1537 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1538 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1539 {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
1540 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1541 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1542 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1543 1, /* scalar_stmt_cost. */
1544 1, /* scalar load_cost. */
1545 1, /* scalar_store_cost. */
1546 1, /* vec_stmt_cost. */
1547 1, /* vec_to_scalar_cost. */
1548 1, /* scalar_to_vec_cost. */
1549 1, /* vec_align_load_cost. */
1550 2, /* vec_unalign_load_cost. */
1551 1, /* vec_store_cost. */
1552 3, /* cond_taken_branch_cost. */
1553 1, /* cond_not_taken_branch_cost. */
1556 /* Generic64 should produce code tuned for Nocona and K8. */
1558 struct processor_costs generic64_cost = {
1559 COSTS_N_INSNS (1), /* cost of an add instruction */
1560 /* On all chips taken into consideration lea is 2 cycles and more. With
1561 this cost however our current implementation of synth_mult results in
1562 use of unnecessary temporary registers causing regression on several
1563 SPECfp benchmarks. */
1564 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1565 COSTS_N_INSNS (1), /* variable shift costs */
1566 COSTS_N_INSNS (1), /* constant shift costs */
1567 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1568 COSTS_N_INSNS (4), /* HI */
1569 COSTS_N_INSNS (3), /* SI */
1570 COSTS_N_INSNS (4), /* DI */
1571 COSTS_N_INSNS (2)}, /* other */
1572 0, /* cost of multiply per each bit set */
1573 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1574 COSTS_N_INSNS (26), /* HI */
1575 COSTS_N_INSNS (42), /* SI */
1576 COSTS_N_INSNS (74), /* DI */
1577 COSTS_N_INSNS (74)}, /* other */
1578 COSTS_N_INSNS (1), /* cost of movsx */
1579 COSTS_N_INSNS (1), /* cost of movzx */
1580 8, /* "large" insn */
1581 17, /* MOVE_RATIO */
1582 4, /* cost for loading QImode using movzbl */
1583 {4, 4, 4}, /* cost of loading integer registers
1584 in QImode, HImode and SImode.
1585 Relative to reg-reg move (2). */
1586 {4, 4, 4}, /* cost of storing integer registers */
1587 4, /* cost of reg,reg fld/fst */
1588 {12, 12, 12}, /* cost of loading fp registers
1589 in SFmode, DFmode and XFmode */
1590 {6, 6, 8}, /* cost of storing fp registers
1591 in SFmode, DFmode and XFmode */
1592 2, /* cost of moving MMX register */
1593 {8, 8}, /* cost of loading MMX registers
1594 in SImode and DImode */
1595 {8, 8}, /* cost of storing MMX registers
1596 in SImode and DImode */
1597 2, /* cost of moving SSE register */
1598 {8, 8, 8}, /* cost of loading SSE registers
1599 in SImode, DImode and TImode */
1600 {8, 8, 8}, /* cost of storing SSE registers
1601 in SImode, DImode and TImode */
1602 5, /* MMX or SSE register to integer */
1603 32, /* size of l1 cache. */
1604 512, /* size of l2 cache. */
1605 64, /* size of prefetch block */
1606 6, /* number of parallel prefetches */
1607 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1608 value is increased to perhaps more appropriate value of 5. */
1609 3, /* Branch cost */
1610 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1611 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1612 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1613 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1614 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1615 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1616 {DUMMY_STRINGOP_ALGS,
1617 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1618 {-1, libcall, false}}}},
1619 {DUMMY_STRINGOP_ALGS,
1620 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1621 {-1, libcall, false}}}},
1622 1, /* scalar_stmt_cost. */
1623 1, /* scalar load_cost. */
1624 1, /* scalar_store_cost. */
1625 1, /* vec_stmt_cost. */
1626 1, /* vec_to_scalar_cost. */
1627 1, /* scalar_to_vec_cost. */
1628 1, /* vec_align_load_cost. */
1629 2, /* vec_unalign_load_cost. */
1630 1, /* vec_store_cost. */
1631 3, /* cond_taken_branch_cost. */
1632 1, /* cond_not_taken_branch_cost. */
1635 /* core_cost should produce code tuned for Core familly of CPUs. */
1637 struct processor_costs core_cost = {
1638 COSTS_N_INSNS (1), /* cost of an add instruction */
1639 /* On all chips taken into consideration lea is 2 cycles and more. With
1640 this cost however our current implementation of synth_mult results in
1641 use of unnecessary temporary registers causing regression on several
1642 SPECfp benchmarks. */
1643 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1644 COSTS_N_INSNS (1), /* variable shift costs */
1645 COSTS_N_INSNS (1), /* constant shift costs */
1646 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1647 COSTS_N_INSNS (4), /* HI */
1648 COSTS_N_INSNS (3), /* SI */
1649 COSTS_N_INSNS (4), /* DI */
1650 COSTS_N_INSNS (2)}, /* other */
1651 0, /* cost of multiply per each bit set */
1652 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1653 COSTS_N_INSNS (26), /* HI */
1654 COSTS_N_INSNS (42), /* SI */
1655 COSTS_N_INSNS (74), /* DI */
1656 COSTS_N_INSNS (74)}, /* other */
1657 COSTS_N_INSNS (1), /* cost of movsx */
1658 COSTS_N_INSNS (1), /* cost of movzx */
1659 8, /* "large" insn */
1660 17, /* MOVE_RATIO */
1661 4, /* cost for loading QImode using movzbl */
1662 {4, 4, 4}, /* cost of loading integer registers
1663 in QImode, HImode and SImode.
1664 Relative to reg-reg move (2). */
1665 {4, 4, 4}, /* cost of storing integer registers */
1666 4, /* cost of reg,reg fld/fst */
1667 {12, 12, 12}, /* cost of loading fp registers
1668 in SFmode, DFmode and XFmode */
1669 {6, 6, 8}, /* cost of storing fp registers
1670 in SFmode, DFmode and XFmode */
1671 2, /* cost of moving MMX register */
1672 {8, 8}, /* cost of loading MMX registers
1673 in SImode and DImode */
1674 {8, 8}, /* cost of storing MMX registers
1675 in SImode and DImode */
1676 2, /* cost of moving SSE register */
1677 {8, 8, 8}, /* cost of loading SSE registers
1678 in SImode, DImode and TImode */
1679 {8, 8, 8}, /* cost of storing SSE registers
1680 in SImode, DImode and TImode */
1681 5, /* MMX or SSE register to integer */
1682 64, /* size of l1 cache. */
1683 512, /* size of l2 cache. */
1684 64, /* size of prefetch block */
1685 6, /* number of parallel prefetches */
1686 /* FIXME perhaps more appropriate value is 5. */
1687 3, /* Branch cost */
1688 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1689 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1690 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1691 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1692 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1693 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1694 {{libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1695 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1696 {-1, libcall, false}}}},
1697 {{libcall, {{6, loop_1_byte, true},
1699 {8192, rep_prefix_4_byte, true},
1700 {-1, libcall, false}}},
1701 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1702 {-1, libcall, false}}}},
1703 1, /* scalar_stmt_cost. */
1704 1, /* scalar load_cost. */
1705 1, /* scalar_store_cost. */
1706 1, /* vec_stmt_cost. */
1707 1, /* vec_to_scalar_cost. */
1708 1, /* scalar_to_vec_cost. */
1709 1, /* vec_align_load_cost. */
1710 2, /* vec_unalign_load_cost. */
1711 1, /* vec_store_cost. */
1712 3, /* cond_taken_branch_cost. */
1713 1, /* cond_not_taken_branch_cost. */
1716 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1719 struct processor_costs generic32_cost = {
1720 COSTS_N_INSNS (1), /* cost of an add instruction */
1721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1722 COSTS_N_INSNS (1), /* variable shift costs */
1723 COSTS_N_INSNS (1), /* constant shift costs */
1724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1725 COSTS_N_INSNS (4), /* HI */
1726 COSTS_N_INSNS (3), /* SI */
1727 COSTS_N_INSNS (4), /* DI */
1728 COSTS_N_INSNS (2)}, /* other */
1729 0, /* cost of multiply per each bit set */
1730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1731 COSTS_N_INSNS (26), /* HI */
1732 COSTS_N_INSNS (42), /* SI */
1733 COSTS_N_INSNS (74), /* DI */
1734 COSTS_N_INSNS (74)}, /* other */
1735 COSTS_N_INSNS (1), /* cost of movsx */
1736 COSTS_N_INSNS (1), /* cost of movzx */
1737 8, /* "large" insn */
1738 17, /* MOVE_RATIO */
1739 4, /* cost for loading QImode using movzbl */
1740 {4, 4, 4}, /* cost of loading integer registers
1741 in QImode, HImode and SImode.
1742 Relative to reg-reg move (2). */
1743 {4, 4, 4}, /* cost of storing integer registers */
1744 4, /* cost of reg,reg fld/fst */
1745 {12, 12, 12}, /* cost of loading fp registers
1746 in SFmode, DFmode and XFmode */
1747 {6, 6, 8}, /* cost of storing fp registers
1748 in SFmode, DFmode and XFmode */
1749 2, /* cost of moving MMX register */
1750 {8, 8}, /* cost of loading MMX registers
1751 in SImode and DImode */
1752 {8, 8}, /* cost of storing MMX registers
1753 in SImode and DImode */
1754 2, /* cost of moving SSE register */
1755 {8, 8, 8}, /* cost of loading SSE registers
1756 in SImode, DImode and TImode */
1757 {8, 8, 8}, /* cost of storing SSE registers
1758 in SImode, DImode and TImode */
1759 5, /* MMX or SSE register to integer */
1760 32, /* size of l1 cache. */
1761 256, /* size of l2 cache. */
1762 64, /* size of prefetch block */
1763 6, /* number of parallel prefetches */
1764 3, /* Branch cost */
1765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1772 {-1, libcall, false}}},
1773 DUMMY_STRINGOP_ALGS},
1774 {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1775 {-1, libcall, false}}},
1776 DUMMY_STRINGOP_ALGS},
1777 1, /* scalar_stmt_cost. */
1778 1, /* scalar load_cost. */
1779 1, /* scalar_store_cost. */
1780 1, /* vec_stmt_cost. */
1781 1, /* vec_to_scalar_cost. */
1782 1, /* scalar_to_vec_cost. */
1783 1, /* vec_align_load_cost. */
1784 2, /* vec_unalign_load_cost. */
1785 1, /* vec_store_cost. */
1786 3, /* cond_taken_branch_cost. */
1787 1, /* cond_not_taken_branch_cost. */
1790 /* Set by -mtune. */
1791 const struct processor_costs *ix86_tune_cost = &pentium_cost;
1793 /* Set by -mtune or -Os. */
1794 const struct processor_costs *ix86_cost = &pentium_cost;
1796 /* Processor feature/optimization bitmasks. */
1797 #define m_386 (1<<PROCESSOR_I386)
1798 #define m_486 (1<<PROCESSOR_I486)
1799 #define m_PENT (1<<PROCESSOR_PENTIUM)
1800 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1801 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1802 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1803 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1804 #define m_CORE2 (1<<PROCESSOR_CORE2)
1805 #define m_COREI7 (1<<PROCESSOR_COREI7)
1806 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1807 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1808 #define m_ATOM (1<<PROCESSOR_ATOM)
1809 #define m_SLM (1<<PROCESSOR_SLM)
1811 #define m_GEODE (1<<PROCESSOR_GEODE)
1812 #define m_K6 (1<<PROCESSOR_K6)
1813 #define m_K6_GEODE (m_K6 | m_GEODE)
1814 #define m_K8 (1<<PROCESSOR_K8)
1815 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1816 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1817 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1818 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1819 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1820 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1821 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1822 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1823 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1824 #define m_BTVER (m_BTVER1 | m_BTVER2)
1825 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1827 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1828 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1830 /* Generic instruction choice should be common subset of supported CPUs
1831 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1832 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1834 /* Feature tests against the various tunings. */
1835 unsigned char ix86_tune_features[X86_TUNE_LAST];
1837 /* Feature tests against the various tunings used to create ix86_tune_features
1838 based on the processor mask. */
1839 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1840 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1841 negatively, so enabling for Generic64 seems like good code size
1842 tradeoff. We can't enable it for 32bit generic because it does not
1843 work well with PPro base chips. */
1844 m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1846 /* X86_TUNE_PUSH_MEMORY */
1847 m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1849 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1852 /* X86_TUNE_UNROLL_STRLEN */
1853 m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1855 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1856 on simulation result. But after P4 was made, no performance benefit
1857 was observed with branch hints. It also increases the code size.
1858 As a result, icc never generates branch hints. */
1861 /* X86_TUNE_DOUBLE_WITH_ADD */
1864 /* X86_TUNE_USE_SAHF */
1865 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
1867 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1868 partial dependencies. */
1869 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1871 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1872 register stalls on Generic32 compilation setting as well. However
1873 in current implementation the partial register stalls are not eliminated
1874 very well - they can be introduced via subregs synthesized by combine
1875 and can happen in caller/callee saving sequences. Because this option
1876 pays back little on PPro based chips and is in conflict with partial reg
1877 dependencies used by Athlon/P4 based chips, it is better to leave it off
1878 for generic32 for now. */
1881 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1882 m_CORE_ALL | m_GENERIC,
1884 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1885 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1886 m_CORE_ALL | m_GENERIC,
1888 /* X86_TUNE_USE_HIMODE_FIOP */
1889 m_386 | m_486 | m_K6_GEODE,
1891 /* X86_TUNE_USE_SIMODE_FIOP */
1892 ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC),
1894 /* X86_TUNE_USE_MOV0 */
1897 /* X86_TUNE_USE_CLTD */
1898 ~(m_PENT | m_ATOM | m_SLM | m_K6),
1900 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1903 /* X86_TUNE_SPLIT_LONG_MOVES */
1906 /* X86_TUNE_READ_MODIFY_WRITE */
1909 /* X86_TUNE_READ_MODIFY */
1912 /* X86_TUNE_PROMOTE_QIMODE */
1913 m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1915 /* X86_TUNE_FAST_PREFIX */
1916 ~(m_386 | m_486 | m_PENT),
1918 /* X86_TUNE_SINGLE_STRINGOP */
1919 m_386 | m_P4_NOCONA,
1921 /* X86_TUNE_QIMODE_MATH */
1924 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1925 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1926 might be considered for Generic32 if our scheme for avoiding partial
1927 stalls was more effective. */
1930 /* X86_TUNE_PROMOTE_QI_REGS */
1933 /* X86_TUNE_PROMOTE_HI_REGS */
1936 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1937 over esp addition. */
1938 m_386 | m_486 | m_PENT | m_PPRO,
1940 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1941 over esp addition. */
1944 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1945 over esp subtraction. */
1946 m_386 | m_486 | m_PENT | m_K6_GEODE,
1948 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1949 over esp subtraction. */
1950 m_PENT | m_K6_GEODE,
1952 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1953 for DFmode copies */
1954 ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC),
1956 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1957 m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
1959 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1960 conflict here in between PPro/Pentium4 based chips that thread 128bit
1961 SSE registers as single units versus K8 based chips that divide SSE
1962 registers to two 64bit halves. This knob promotes all store destinations
1963 to be 128bit to allow register renaming on 128bit SSE units, but usually
1964 results in one extra microop on 64bit SSE units. Experimental results
1965 shows that disabling this option on P4 brings over 20% SPECfp regression,
1966 while enabling it on K8 brings roughly 2.4% regression that can be partly
1967 masked by careful scheduling of moves. */
1968 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC,
1970 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1971 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM,
1973 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1974 m_COREI7 | m_BDVER | m_SLM,
1976 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1979 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1980 are resolved on SSE register parts instead of whole registers, so we may
1981 maintain just lower part of scalar values in proper format leaving the
1982 upper part undefined. */
1985 /* X86_TUNE_SSE_TYPELESS_STORES */
1988 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1989 m_PPRO | m_P4_NOCONA,
1991 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1992 m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
1994 /* X86_TUNE_PROLOGUE_USING_MOVE */
1995 m_PPRO | m_ATHLON_K8,
1997 /* X86_TUNE_EPILOGUE_USING_MOVE */
1998 m_PPRO | m_ATHLON_K8,
2000 /* X86_TUNE_SHIFT1 */
2003 /* X86_TUNE_USE_FFREEP */
2006 /* X86_TUNE_INTER_UNIT_MOVES */
2007 ~(m_AMD_MULTIPLE | m_GENERIC),
2009 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2010 ~(m_AMDFAM10 | m_BDVER ),
2012 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2013 than 4 branch instructions in the 16 byte window. */
2014 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
2016 /* X86_TUNE_SCHEDULE */
2017 m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2019 /* X86_TUNE_USE_BT */
2020 m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
2022 /* X86_TUNE_USE_INCDEC */
2023 ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC),
2025 /* X86_TUNE_PAD_RETURNS */
2026 m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC,
2028 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2031 /* X86_TUNE_EXT_80387_CONSTANTS */
2032 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2034 /* X86_TUNE_AVOID_VECTOR_DECODE */
2035 m_CORE_ALL | m_K8 | m_GENERIC64,
2037 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2038 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2041 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2042 vector path on AMD machines. */
2043 m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
2045 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2047 m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
2049 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2053 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2054 but one byte longer. */
2057 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2058 operand that cannot be represented using a modRM byte. The XOR
2059 replacement is long decoded, so this split helps here as well. */
2062 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2064 m_CORE_ALL | m_AMDFAM10 | m_GENERIC,
2066 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2067 from integer to FP. */
2070 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2071 with a subsequent conditional jump instruction into a single
2072 compare-and-branch uop. */
2075 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2076 will impact LEA instruction selection. */
2079 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2083 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2084 at -O3. For the moment, the prefetching seems badly tuned for Intel
2086 m_K6_GEODE | m_AMD_MULTIPLE,
2088 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2089 the auto-vectorizer. */
2092 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2093 during reassociation of integer computation. */
2096 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2097 during reassociation of fp computation. */
2098 m_ATOM | m_SLM | m_HASWELL,
2100 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2101 regs instead of memory. */
2104 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2105 a conditional move. */
2108 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
2109 fp converts to destination register. */
2114 /* Feature tests against the various architecture variations. */
2115 unsigned char ix86_arch_features[X86_ARCH_LAST];
2117 /* Feature tests against the various architecture variations, used to create
2118 ix86_arch_features based on the processor mask. */
2119 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2120 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2121 ~(m_386 | m_486 | m_PENT | m_K6),
2123 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2126 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2129 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2132 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2136 static const unsigned int x86_accumulate_outgoing_args
2137 = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
2139 static const unsigned int x86_arch_always_fancy_math_387
2140 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
2142 static const unsigned int x86_avx256_split_unaligned_load
2143 = m_COREI7 | m_GENERIC;
2145 static const unsigned int x86_avx256_split_unaligned_store
2146 = m_COREI7 | m_BDVER | m_GENERIC;
2148 /* In case the average insn count for single function invocation is
2149 lower than this constant, emit fast (but longer) prologue and
2151 #define FAST_PROLOGUE_INSN_COUNT 20
2153 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2154 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2155 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2156 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2158 /* Array of the smallest class containing reg number REGNO, indexed by
2159 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2161 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2163 /* ax, dx, cx, bx */
2164 AREG, DREG, CREG, BREG,
2165 /* si, di, bp, sp */
2166 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2168 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2169 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2172 /* flags, fpsr, fpcr, frame */
2173 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2175 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2178 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2181 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2182 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2183 /* SSE REX registers */
2184 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2188 /* The "default" register map used in 32bit mode. */
2190 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2192 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2193 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2194 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2195 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2196 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2197 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2198 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2201 /* The "default" register map used in 64bit mode. */
2203 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2205 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2206 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2207 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2208 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2209 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2210 8,9,10,11,12,13,14,15, /* extended integer registers */
2211 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2214 /* Define the register numbers to be used in Dwarf debugging information.
2215 The SVR4 reference port C compiler uses the following register numbers
2216 in its Dwarf output code:
2217 0 for %eax (gcc regno = 0)
2218 1 for %ecx (gcc regno = 2)
2219 2 for %edx (gcc regno = 1)
2220 3 for %ebx (gcc regno = 3)
2221 4 for %esp (gcc regno = 7)
2222 5 for %ebp (gcc regno = 6)
2223 6 for %esi (gcc regno = 4)
2224 7 for %edi (gcc regno = 5)
2225 The following three DWARF register numbers are never generated by
2226 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2227 believes these numbers have these meanings.
2228 8 for %eip (no gcc equivalent)
2229 9 for %eflags (gcc regno = 17)
2230 10 for %trapno (no gcc equivalent)
2231 It is not at all clear how we should number the FP stack registers
2232 for the x86 architecture. If the version of SDB on x86/svr4 were
2233 a bit less brain dead with respect to floating-point then we would
2234 have a precedent to follow with respect to DWARF register numbers
2235 for x86 FP registers, but the SDB on x86/svr4 is so completely
2236 broken with respect to FP registers that it is hardly worth thinking
2237 of it as something to strive for compatibility with.
2238 The version of x86/svr4 SDB I have at the moment does (partially)
2239 seem to believe that DWARF register number 11 is associated with
2240 the x86 register %st(0), but that's about all. Higher DWARF
2241 register numbers don't seem to be associated with anything in
2242 particular, and even for DWARF regno 11, SDB only seems to under-
2243 stand that it should say that a variable lives in %st(0) (when
2244 asked via an `=' command) if we said it was in DWARF regno 11,
2245 but SDB still prints garbage when asked for the value of the
2246 variable in question (via a `/' command).
2247 (Also note that the labels SDB prints for various FP stack regs
2248 when doing an `x' command are all wrong.)
2249 Note that these problems generally don't affect the native SVR4
2250 C compiler because it doesn't allow the use of -O with -g and
2251 because when it is *not* optimizing, it allocates a memory
2252 location for each floating-point variable, and the memory
2253 location is what gets described in the DWARF AT_location
2254 attribute for the variable in question.
2255 Regardless of the severe mental illness of the x86/svr4 SDB, we
2256 do something sensible here and we use the following DWARF
2257 register numbers. Note that these are all stack-top-relative
2259 11 for %st(0) (gcc regno = 8)
2260 12 for %st(1) (gcc regno = 9)
2261 13 for %st(2) (gcc regno = 10)
2262 14 for %st(3) (gcc regno = 11)
2263 15 for %st(4) (gcc regno = 12)
2264 16 for %st(5) (gcc regno = 13)
2265 17 for %st(6) (gcc regno = 14)
2266 18 for %st(7) (gcc regno = 15)
2268 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2270 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2271 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2272 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2273 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2274 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2275 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2276 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2279 /* Define parameter passing and return registers. */
2281 static int const x86_64_int_parameter_registers[6] =
2283 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2286 static int const x86_64_ms_abi_int_parameter_registers[4] =
2288 CX_REG, DX_REG, R8_REG, R9_REG
2291 static int const x86_64_int_return_registers[4] =
2293 AX_REG, DX_REG, DI_REG, SI_REG
2296 /* Define the structure for the machine field in struct function. */
2298 struct GTY(()) stack_local_entry {
2299 unsigned short mode;
2302 struct stack_local_entry *next;
2305 /* Structure describing stack frame layout.
2306 Stack grows downward:
2312 saved static chain if ix86_static_chain_on_stack
2314 saved frame pointer if frame_pointer_needed
2315 <- HARD_FRAME_POINTER
2321 <- sse_regs_save_offset
2324 [va_arg registers] |
2328 [padding2] | = to_allocate
2337 int outgoing_arguments_size;
2339 /* The offsets relative to ARG_POINTER. */
2340 HOST_WIDE_INT frame_pointer_offset;
2341 HOST_WIDE_INT hard_frame_pointer_offset;
2342 HOST_WIDE_INT stack_pointer_offset;
2343 HOST_WIDE_INT hfp_save_offset;
2344 HOST_WIDE_INT reg_save_offset;
2345 HOST_WIDE_INT sse_reg_save_offset;
2347 /* When save_regs_using_mov is set, emit prologue using
2348 move instead of push instructions. */
2349 bool save_regs_using_mov;
2352 /* Which cpu are we scheduling for. */
2353 enum attr_cpu ix86_schedule;
2355 /* Which cpu are we optimizing for. */
2356 enum processor_type ix86_tune;
2358 /* Which instruction set architecture to use. */
2359 enum processor_type ix86_arch;
2361 /* True if processor has SSE prefetch instruction. */
2362 unsigned char x86_prefetch_sse;
2364 /* -mstackrealign option */
2365 static const char ix86_force_align_arg_pointer_string[]
2366 = "force_align_arg_pointer";
2368 static rtx (*ix86_gen_leave) (void);
2369 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2370 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2371 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2372 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2373 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2375 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2376 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2379 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2381 /* Preferred alignment for stack boundary in bits. */
2382 unsigned int ix86_preferred_stack_boundary;
2384 /* Alignment for incoming stack boundary in bits specified at
2386 static unsigned int ix86_user_incoming_stack_boundary;
2388 /* Default alignment for incoming stack boundary in bits. */
2389 static unsigned int ix86_default_incoming_stack_boundary;
2391 /* Alignment for incoming stack boundary in bits. */
2392 unsigned int ix86_incoming_stack_boundary;
2394 /* Calling abi specific va_list type nodes. */
2395 static GTY(()) tree sysv_va_list_type_node;
2396 static GTY(()) tree ms_va_list_type_node;
2398 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2399 char internal_label_prefix[16];
2400 int internal_label_prefix_len;
2402 /* Fence to use after loop using movnt. */
2405 /* Register class used for passing given 64bit part of the argument.
2406 These represent classes as documented by the PS ABI, with the exception
2407 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2408 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2410 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2411 whenever possible (upper half does contain padding). */
2412 enum x86_64_reg_class
2415 X86_64_INTEGER_CLASS,
2416 X86_64_INTEGERSI_CLASS,
2423 X86_64_COMPLEX_X87_CLASS,
2427 #define MAX_CLASSES 4
2429 /* Table of constants used by fldpi, fldln2, etc.... */
2430 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2431 static bool ext_80387_constants_init = 0;
2434 static struct machine_function * ix86_init_machine_status (void);
2435 static rtx ix86_function_value (const_tree, const_tree, bool);
2436 static bool ix86_function_value_regno_p (const unsigned int);
2437 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2439 static rtx ix86_static_chain (const_tree, bool);
2440 static int ix86_function_regparm (const_tree, const_tree);
2441 static void ix86_compute_frame_layout (struct ix86_frame *);
2442 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2444 static void ix86_add_new_builtins (HOST_WIDE_INT);
2445 static tree ix86_canonical_va_list_type (tree);
2446 static void predict_jump (int);
2447 static unsigned int split_stack_prologue_scratch_regno (void);
2448 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2450 enum ix86_function_specific_strings
2452 IX86_FUNCTION_SPECIFIC_ARCH,
2453 IX86_FUNCTION_SPECIFIC_TUNE,
2454 IX86_FUNCTION_SPECIFIC_MAX
2457 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2458 const char *, enum fpmath_unit, bool);
2459 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2460 static void ix86_function_specific_save (struct cl_target_option *);
2461 static void ix86_function_specific_restore (struct cl_target_option *);
2462 static void ix86_function_specific_print (FILE *, int,
2463 struct cl_target_option *);
2464 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2465 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2466 struct gcc_options *);
2467 static bool ix86_can_inline_p (tree, tree);
2468 static void ix86_set_current_function (tree);
2469 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2471 static enum calling_abi ix86_function_abi (const_tree);
2474 #ifndef SUBTARGET32_DEFAULT_CPU
2475 #define SUBTARGET32_DEFAULT_CPU "i386"
2478 /* Whether -mtune= or -march= were specified */
2479 static int ix86_tune_defaulted;
2480 static int ix86_arch_specified;
2482 /* Vectorization library interface and handlers. */
2483 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2485 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2486 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2488 /* Processor target table, indexed by processor number */
2491 const struct processor_costs *cost; /* Processor costs */
2492 const int align_loop; /* Default alignments. */
2493 const int align_loop_max_skip;
2494 const int align_jump;
2495 const int align_jump_max_skip;
2496 const int align_func;
2499 static const struct ptt processor_target_table[PROCESSOR_max] =
2501 {&i386_cost, 4, 3, 4, 3, 4},
2502 {&i486_cost, 16, 15, 16, 15, 16},
2503 {&pentium_cost, 16, 7, 16, 7, 16},
2504 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2505 {&geode_cost, 0, 0, 0, 0, 0},
2506 {&k6_cost, 32, 7, 32, 7, 32},
2507 {&athlon_cost, 16, 7, 16, 7, 16},
2508 {&pentium4_cost, 0, 0, 0, 0, 0},
2509 {&k8_cost, 16, 7, 16, 7, 16},
2510 {&nocona_cost, 0, 0, 0, 0, 0},
2512 {&core_cost, 16, 10, 16, 10, 16},
2514 {&core_cost, 16, 10, 16, 10, 16},
2516 {&core_cost, 16, 10, 16, 10, 16},
2517 {&generic32_cost, 16, 7, 16, 7, 16},
2518 {&generic64_cost, 16, 10, 16, 10, 16},
2519 {&amdfam10_cost, 32, 24, 32, 7, 32},
2520 {&bdver1_cost, 32, 24, 32, 7, 32},
2521 {&bdver2_cost, 32, 24, 32, 7, 32},
2522 {&bdver3_cost, 32, 24, 32, 7, 32},
2523 {&btver1_cost, 32, 24, 32, 7, 32},
2524 {&btver2_cost, 32, 24, 32, 7, 32},
2525 {&atom_cost, 16, 15, 16, 7, 16},
2526 {&slm_cost, 16, 15, 16, 7, 16}
2529 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2564 gate_insert_vzeroupper (void)
2566 return TARGET_VZEROUPPER;
2570 rest_of_handle_insert_vzeroupper (void)
2574 /* vzeroupper instructions are inserted immediately after reload to
2575 account for possible spills from 256bit registers. The pass
2576 reuses mode switching infrastructure by re-running mode insertion
2577 pass, so disable entities that have already been processed. */
2578 for (i = 0; i < MAX_386_ENTITIES; i++)
2579 ix86_optimize_mode_switching[i] = 0;
2581 ix86_optimize_mode_switching[AVX_U128] = 1;
2583 /* Call optimize_mode_switching. */
2584 pass_mode_switching.pass.execute ();
2588 struct rtl_opt_pass pass_insert_vzeroupper =
2592 "vzeroupper", /* name */
2593 OPTGROUP_NONE, /* optinfo_flags */
2594 gate_insert_vzeroupper, /* gate */
2595 rest_of_handle_insert_vzeroupper, /* execute */
2598 0, /* static_pass_number */
2599 TV_NONE, /* tv_id */
2600 0, /* properties_required */
2601 0, /* properties_provided */
2602 0, /* properties_destroyed */
2603 0, /* todo_flags_start */
2604 TODO_df_finish | TODO_verify_rtl_sharing |
2605 0, /* todo_flags_finish */
2609 /* Return true if a red-zone is in use. */
2612 ix86_using_red_zone (void)
2614 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2617 /* Return a string that documents the current -m options. The caller is
2618 responsible for freeing the string. */
2621 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2622 const char *tune, enum fpmath_unit fpmath,
2625 struct ix86_target_opts
2627 const char *option; /* option string */
2628 HOST_WIDE_INT mask; /* isa mask options */
2631 /* This table is ordered so that options like -msse4.2 that imply
2632 preceding options while match those first. */
2633 static struct ix86_target_opts isa_opts[] =
2635 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2636 { "-mfma", OPTION_MASK_ISA_FMA },
2637 { "-mxop", OPTION_MASK_ISA_XOP },
2638 { "-mlwp", OPTION_MASK_ISA_LWP },
2639 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2640 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2641 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2642 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2643 { "-msse3", OPTION_MASK_ISA_SSE3 },
2644 { "-msse2", OPTION_MASK_ISA_SSE2 },
2645 { "-msse", OPTION_MASK_ISA_SSE },
2646 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2647 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2648 { "-mmmx", OPTION_MASK_ISA_MMX },
2649 { "-mabm", OPTION_MASK_ISA_ABM },
2650 { "-mbmi", OPTION_MASK_ISA_BMI },
2651 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2652 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2653 { "-mhle", OPTION_MASK_ISA_HLE },
2654 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2655 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2656 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2657 { "-madx", OPTION_MASK_ISA_ADX },
2658 { "-mtbm", OPTION_MASK_ISA_TBM },
2659 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2660 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2661 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2662 { "-maes", OPTION_MASK_ISA_AES },
2663 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2664 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2665 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2666 { "-mf16c", OPTION_MASK_ISA_F16C },
2667 { "-mrtm", OPTION_MASK_ISA_RTM },
2668 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2669 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 static struct ix86_target_opts flag_opts[] =
2675 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2676 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2677 { "-m80387", MASK_80387 },
2678 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2679 { "-malign-double", MASK_ALIGN_DOUBLE },
2680 { "-mcld", MASK_CLD },
2681 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2682 { "-mieee-fp", MASK_IEEE_FP },
2683 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2684 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2685 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2686 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2687 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2688 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2689 { "-mno-red-zone", MASK_NO_RED_ZONE },
2690 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2691 { "-mrecip", MASK_RECIP },
2692 { "-mrtd", MASK_RTD },
2693 { "-msseregparm", MASK_SSEREGPARM },
2694 { "-mstack-arg-probe", MASK_STACK_PROBE },
2695 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2696 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2697 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2698 { "-mvzeroupper", MASK_VZEROUPPER },
2699 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2700 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2701 { "-mprefer-avx128", MASK_PREFER_AVX128},
2704 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2707 char target_other[40];
2717 memset (opts, '\0', sizeof (opts));
2719 /* Add -march= option. */
2722 opts[num][0] = "-march=";
2723 opts[num++][1] = arch;
2726 /* Add -mtune= option. */
2729 opts[num][0] = "-mtune=";
2730 opts[num++][1] = tune;
2733 /* Add -m32/-m64/-mx32. */
2734 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2736 if ((isa & OPTION_MASK_ABI_64) != 0)
2740 isa &= ~ (OPTION_MASK_ISA_64BIT
2741 | OPTION_MASK_ABI_64
2742 | OPTION_MASK_ABI_X32);
2746 opts[num++][0] = abi;
2748 /* Pick out the options in isa options. */
2749 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2751 if ((isa & isa_opts[i].mask) != 0)
2753 opts[num++][0] = isa_opts[i].option;
2754 isa &= ~ isa_opts[i].mask;
2758 if (isa && add_nl_p)
2760 opts[num++][0] = isa_other;
2761 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2765 /* Add flag options. */
2766 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2768 if ((flags & flag_opts[i].mask) != 0)
2770 opts[num++][0] = flag_opts[i].option;
2771 flags &= ~ flag_opts[i].mask;
2775 if (flags && add_nl_p)
2777 opts[num++][0] = target_other;
2778 sprintf (target_other, "(other flags: %#x)", flags);
2781 /* Add -fpmath= option. */
2784 opts[num][0] = "-mfpmath=";
2785 switch ((int) fpmath)
2788 opts[num++][1] = "387";
2792 opts[num++][1] = "sse";
2795 case FPMATH_387 | FPMATH_SSE:
2796 opts[num++][1] = "sse+387";
2808 gcc_assert (num < ARRAY_SIZE (opts));
2810 /* Size the string. */
2812 sep_len = (add_nl_p) ? 3 : 1;
2813 for (i = 0; i < num; i++)
2816 for (j = 0; j < 2; j++)
2818 len += strlen (opts[i][j]);
2821 /* Build the string. */
2822 ret = ptr = (char *) xmalloc (len);
2825 for (i = 0; i < num; i++)
2829 for (j = 0; j < 2; j++)
2830 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2837 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2845 for (j = 0; j < 2; j++)
2848 memcpy (ptr, opts[i][j], len2[j]);
2850 line_len += len2[j];
2855 gcc_assert (ret + len >= ptr);
2860 /* Return true, if profiling code should be emitted before
2861 prologue. Otherwise it returns false.
2862 Note: For x86 with "hotfix" it is sorried. */
2864 ix86_profile_before_prologue (void)
2866 return flag_fentry != 0;
2869 /* Function that is callable from the debugger to print the current
2872 ix86_debug_options (void)
2874 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2875 ix86_arch_string, ix86_tune_string,
2880 fprintf (stderr, "%s\n\n", opts);
2884 fputs ("<no options>\n\n", stderr);
2889 /* Override various settings based on options. If MAIN_ARGS_P, the
2890 options are from the command line, otherwise they are from
2894 ix86_option_override_internal (bool main_args_p)
2897 unsigned int ix86_arch_mask, ix86_tune_mask;
2898 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2903 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2904 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2905 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2906 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2907 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2908 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2909 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2910 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2911 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2912 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2913 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2914 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2915 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2916 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2917 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2918 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2919 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2920 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2921 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2922 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2923 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2924 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2925 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2926 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2927 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2928 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2929 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2930 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2931 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2932 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2933 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2934 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2935 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2936 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2937 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2938 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2939 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2940 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2941 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2942 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2944 /* if this reaches 64, need to widen struct pta flags below */
2948 const char *const name; /* processor name or nickname. */
2949 const enum processor_type processor;
2950 const enum attr_cpu schedule;
2951 const unsigned HOST_WIDE_INT flags;
2953 const processor_alias_table[] =
2955 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2956 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2957 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2958 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2959 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2960 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2961 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
2962 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
2963 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2964 PTA_MMX | PTA_SSE | PTA_FXSR},
2965 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2966 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2967 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
2968 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2969 PTA_MMX | PTA_SSE | PTA_FXSR},
2970 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2971 PTA_MMX | PTA_SSE | PTA_FXSR},
2972 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2973 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
2974 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2975 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
2976 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2977 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
2978 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2979 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
2980 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2981 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2982 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
2983 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2985 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR},
2986 {"corei7", PROCESSOR_COREI7, CPU_COREI7,
2987 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
2988 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT | PTA_FXSR},
2989 {"corei7-avx", PROCESSOR_COREI7, CPU_COREI7,
2990 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2991 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2992 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL
2993 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
2994 {"core-avx-i", PROCESSOR_COREI7, CPU_COREI7,
2995 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2996 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2997 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2998 | PTA_RDRND | PTA_F16C | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
2999 {"core-avx2", PROCESSOR_HASWELL, CPU_COREI7,
3000 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3001 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
3002 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3003 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
3004 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE | PTA_FXSR | PTA_XSAVE
3006 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3007 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3008 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
3009 {"slm", PROCESSOR_SLM, CPU_SLM,
3010 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3011 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_MOVBE
3013 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3014 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3015 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3016 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3017 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3018 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3019 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3020 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3021 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3022 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3023 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3024 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3025 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3026 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3027 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3028 {"x86-64", PROCESSOR_K8, CPU_K8,
3029 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3030 {"k8", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3033 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3036 {"opteron", PROCESSOR_K8, CPU_K8,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3039 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3040 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3041 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3042 {"athlon64", PROCESSOR_K8, CPU_K8,
3043 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3044 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3045 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3046 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3047 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3048 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3049 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3050 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3051 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3052 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3053 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3054 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3055 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3056 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3057 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3058 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3059 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3060 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3061 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3062 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3063 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3064 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3065 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3066 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3067 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3068 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3069 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3070 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3071 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3072 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3073 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3075 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3076 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3077 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3078 | PTA_FXSR | PTA_XSAVE},
3079 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3080 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3081 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3082 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3083 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3084 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3086 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3087 PTA_HLE /* flags are only used for -march switch. */ },
3088 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3090 | PTA_HLE /* flags are only used for -march switch. */ },
3093 /* -mrecip options. */
3096 const char *string; /* option name */
3097 unsigned int mask; /* mask bits to set */
3099 const recip_options[] =
3101 { "all", RECIP_MASK_ALL },
3102 { "none", RECIP_MASK_NONE },
3103 { "div", RECIP_MASK_DIV },
3104 { "sqrt", RECIP_MASK_SQRT },
3105 { "vec-div", RECIP_MASK_VEC_DIV },
3106 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3109 int const pta_size = ARRAY_SIZE (processor_alias_table);
3111 /* Set up prefix/suffix so the error messages refer to either the command
3112 line argument, or the attribute(target). */
3121 prefix = "option(\"";
3126 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3127 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3128 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT)
3129 ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3130 #ifdef TARGET_BI_ARCH
3133 #if TARGET_BI_ARCH == 1
3134 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3135 is on and OPTION_MASK_ABI_X32 is off. We turn off
3136 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3139 ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3141 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3142 on and OPTION_MASK_ABI_64 is off. We turn off
3143 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3146 ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3153 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3154 OPTION_MASK_ABI_64 for TARGET_X32. */
3155 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3156 ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3158 else if (TARGET_LP64)
3160 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3161 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3162 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3163 ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3166 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3167 SUBTARGET_OVERRIDE_OPTIONS;
3170 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3171 SUBSUBTARGET_OVERRIDE_OPTIONS;
3174 /* -fPIC is the default for x86_64. */
3175 if (TARGET_MACHO && TARGET_64BIT)
3178 /* Need to check -mtune=generic first. */
3179 if (ix86_tune_string)
3181 if (!strcmp (ix86_tune_string, "generic")
3182 || !strcmp (ix86_tune_string, "i686")
3183 /* As special support for cross compilers we read -mtune=native
3184 as -mtune=generic. With native compilers we won't see the
3185 -mtune=native, as it was changed by the driver. */
3186 || !strcmp (ix86_tune_string, "native"))
3189 ix86_tune_string = "generic64";
3191 ix86_tune_string = "generic32";
3193 /* If this call is for setting the option attribute, allow the
3194 generic32/generic64 that was previously set. */
3195 else if (!main_args_p
3196 && (!strcmp (ix86_tune_string, "generic32")
3197 || !strcmp (ix86_tune_string, "generic64")))
3199 else if (!strncmp (ix86_tune_string, "generic", 7))
3200 error ("bad value (%s) for %stune=%s %s",
3201 ix86_tune_string, prefix, suffix, sw);
3202 else if (!strcmp (ix86_tune_string, "x86-64"))
3203 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3204 "%stune=k8%s or %stune=generic%s instead as appropriate",
3205 prefix, suffix, prefix, suffix, prefix, suffix);
3209 if (ix86_arch_string)
3210 ix86_tune_string = ix86_arch_string;
3211 if (!ix86_tune_string)
3213 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3214 ix86_tune_defaulted = 1;
3217 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3218 need to use a sensible tune option. */
3219 if (!strcmp (ix86_tune_string, "generic")
3220 || !strcmp (ix86_tune_string, "x86-64")
3221 || !strcmp (ix86_tune_string, "i686"))
3224 ix86_tune_string = "generic64";
3226 ix86_tune_string = "generic32";
3230 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3232 /* rep; movq isn't available in 32-bit code. */
3233 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3234 ix86_stringop_alg = no_stringop;
3237 if (!ix86_arch_string)
3238 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3240 ix86_arch_specified = 1;
3242 if (global_options_set.x_ix86_pmode)
3244 if ((TARGET_LP64 && ix86_pmode == PMODE_SI)
3245 || (!TARGET_64BIT && ix86_pmode == PMODE_DI))
3246 error ("address mode %qs not supported in the %s bit mode",
3247 TARGET_64BIT ? "short" : "long",
3248 TARGET_64BIT ? "64" : "32");
3251 ix86_pmode = TARGET_LP64 ? PMODE_DI : PMODE_SI;
3253 if (!global_options_set.x_ix86_abi)
3254 ix86_abi = DEFAULT_ABI;
3256 if (global_options_set.x_ix86_cmodel)
3258 switch (ix86_cmodel)
3263 ix86_cmodel = CM_SMALL_PIC;
3265 error ("code model %qs not supported in the %s bit mode",
3272 ix86_cmodel = CM_MEDIUM_PIC;
3274 error ("code model %qs not supported in the %s bit mode",
3276 else if (TARGET_X32)
3277 error ("code model %qs not supported in x32 mode",
3284 ix86_cmodel = CM_LARGE_PIC;
3286 error ("code model %qs not supported in the %s bit mode",
3288 else if (TARGET_X32)
3289 error ("code model %qs not supported in x32 mode",
3295 error ("code model %s does not support PIC mode", "32");
3297 error ("code model %qs not supported in the %s bit mode",
3304 error ("code model %s does not support PIC mode", "kernel");
3305 ix86_cmodel = CM_32;
3308 error ("code model %qs not supported in the %s bit mode",
3318 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3319 use of rip-relative addressing. This eliminates fixups that
3320 would otherwise be needed if this object is to be placed in a
3321 DLL, and is essentially just as efficient as direct addressing. */
3322 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3323 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3324 else if (TARGET_64BIT && TARGET_RDOS)
3325 ix86_cmodel = CM_MEDIUM_PIC, flag_pic = 1;
3326 else if (TARGET_64BIT)
3327 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3329 ix86_cmodel = CM_32;
3331 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3333 error ("-masm=intel not supported in this configuration");
3334 ix86_asm_dialect = ASM_ATT;
3336 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3337 sorry ("%i-bit mode not compiled in",
3338 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3340 for (i = 0; i < pta_size; i++)
3341 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3343 ix86_schedule = processor_alias_table[i].schedule;
3344 ix86_arch = processor_alias_table[i].processor;
3345 /* Default cpu tuning to the architecture. */
3346 ix86_tune = ix86_arch;
3348 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3349 error ("CPU you selected does not support x86-64 "
3352 if (processor_alias_table[i].flags & PTA_MMX
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3354 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3355 if (processor_alias_table[i].flags & PTA_3DNOW
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3357 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3358 if (processor_alias_table[i].flags & PTA_3DNOW_A
3359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3360 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3361 if (processor_alias_table[i].flags & PTA_SSE
3362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3363 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3364 if (processor_alias_table[i].flags & PTA_SSE2
3365 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3366 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3367 if (processor_alias_table[i].flags & PTA_SSE3
3368 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3369 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3370 if (processor_alias_table[i].flags & PTA_SSSE3
3371 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3372 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3373 if (processor_alias_table[i].flags & PTA_SSE4_1
3374 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3375 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3376 if (processor_alias_table[i].flags & PTA_SSE4_2
3377 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3378 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3379 if (processor_alias_table[i].flags & PTA_AVX
3380 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3381 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3382 if (processor_alias_table[i].flags & PTA_AVX2
3383 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3384 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3385 if (processor_alias_table[i].flags & PTA_FMA
3386 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3387 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3388 if (processor_alias_table[i].flags & PTA_SSE4A
3389 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3390 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3391 if (processor_alias_table[i].flags & PTA_FMA4
3392 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3393 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3394 if (processor_alias_table[i].flags & PTA_XOP
3395 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3396 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3397 if (processor_alias_table[i].flags & PTA_LWP
3398 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3399 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3400 if (processor_alias_table[i].flags & PTA_ABM
3401 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3402 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3403 if (processor_alias_table[i].flags & PTA_BMI
3404 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3405 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3406 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3407 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3408 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3409 if (processor_alias_table[i].flags & PTA_TBM
3410 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3411 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3412 if (processor_alias_table[i].flags & PTA_BMI2
3413 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3414 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3415 if (processor_alias_table[i].flags & PTA_CX16
3416 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3417 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3418 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3419 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3420 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3421 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3422 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3423 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3424 if (processor_alias_table[i].flags & PTA_MOVBE
3425 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3426 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3427 if (processor_alias_table[i].flags & PTA_AES
3428 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3429 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3430 if (processor_alias_table[i].flags & PTA_PCLMUL
3431 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3432 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3433 if (processor_alias_table[i].flags & PTA_FSGSBASE
3434 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3435 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3436 if (processor_alias_table[i].flags & PTA_RDRND
3437 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3438 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3439 if (processor_alias_table[i].flags & PTA_F16C
3440 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3441 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3442 if (processor_alias_table[i].flags & PTA_RTM
3443 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3444 ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3445 if (processor_alias_table[i].flags & PTA_HLE
3446 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3447 ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3448 if (processor_alias_table[i].flags & PTA_PRFCHW
3449 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3450 ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3451 if (processor_alias_table[i].flags & PTA_RDSEED
3452 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3453 ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3454 if (processor_alias_table[i].flags & PTA_ADX
3455 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3456 ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3457 if (processor_alias_table[i].flags & PTA_FXSR
3458 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3459 ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3460 if (processor_alias_table[i].flags & PTA_XSAVE
3461 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3462 ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3463 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3464 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3465 ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3466 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3467 x86_prefetch_sse = true;
3472 if (!strcmp (ix86_arch_string, "generic"))
3473 error ("generic CPU can be used only for %stune=%s %s",
3474 prefix, suffix, sw);
3475 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3476 error ("bad value (%s) for %sarch=%s %s",
3477 ix86_arch_string, prefix, suffix, sw);
3479 ix86_arch_mask = 1u << ix86_arch;
3480 for (i = 0; i < X86_ARCH_LAST; ++i)
3481 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3483 for (i = 0; i < pta_size; i++)
3484 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3486 ix86_schedule = processor_alias_table[i].schedule;
3487 ix86_tune = processor_alias_table[i].processor;
3490 if (!(processor_alias_table[i].flags & PTA_64BIT))
3492 if (ix86_tune_defaulted)
3494 ix86_tune_string = "x86-64";
3495 for (i = 0; i < pta_size; i++)
3496 if (! strcmp (ix86_tune_string,
3497 processor_alias_table[i].name))
3499 ix86_schedule = processor_alias_table[i].schedule;
3500 ix86_tune = processor_alias_table[i].processor;
3503 error ("CPU you selected does not support x86-64 "
3509 /* Adjust tuning when compiling for 32-bit ABI. */
3512 case PROCESSOR_GENERIC64:
3513 ix86_tune = PROCESSOR_GENERIC32;
3514 ix86_schedule = CPU_PENTIUMPRO;
3521 /* Intel CPUs have always interpreted SSE prefetch instructions as
3522 NOPs; so, we can enable SSE prefetch instructions even when
3523 -mtune (rather than -march) points us to a processor that has them.
3524 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3525 higher processors. */
3527 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3528 x86_prefetch_sse = true;
3532 if (ix86_tune_specified && i == pta_size)
3533 error ("bad value (%s) for %stune=%s %s",
3534 ix86_tune_string, prefix, suffix, sw);
3536 ix86_tune_mask = 1u << ix86_tune;
3537 for (i = 0; i < X86_TUNE_LAST; ++i)
3538 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3540 #ifndef USE_IX86_FRAME_POINTER
3541 #define USE_IX86_FRAME_POINTER 0
3544 #ifndef USE_X86_64_FRAME_POINTER
3545 #define USE_X86_64_FRAME_POINTER 0
3548 /* Set the default values for switches whose default depends on TARGET_64BIT
3549 in case they weren't overwritten by command line options. */
3552 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3553 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3554 if (flag_asynchronous_unwind_tables == 2)
3555 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3556 if (flag_pcc_struct_return == 2)
3557 flag_pcc_struct_return = 0;
3561 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3562 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3563 if (flag_asynchronous_unwind_tables == 2)
3564 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3565 if (flag_pcc_struct_return == 2)
3566 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3569 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3571 ix86_cost = &ix86_size_cost;
3573 ix86_cost = ix86_tune_cost;
3575 /* Arrange to set up i386_stack_locals for all functions. */
3576 init_machine_status = ix86_init_machine_status;
3578 /* Validate -mregparm= value. */
3579 if (global_options_set.x_ix86_regparm)
3582 warning (0, "-mregparm is ignored in 64-bit mode");
3583 if (ix86_regparm > REGPARM_MAX)
3585 error ("-mregparm=%d is not between 0 and %d",
3586 ix86_regparm, REGPARM_MAX);
3591 ix86_regparm = REGPARM_MAX;
3593 /* Default align_* from the processor table. */
3594 if (align_loops == 0)
3596 align_loops = processor_target_table[ix86_tune].align_loop;
3597 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3599 if (align_jumps == 0)
3601 align_jumps = processor_target_table[ix86_tune].align_jump;
3602 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3604 if (align_functions == 0)
3606 align_functions = processor_target_table[ix86_tune].align_func;
3609 /* Provide default for -mbranch-cost= value. */
3610 if (!global_options_set.x_ix86_branch_cost)
3611 ix86_branch_cost = ix86_cost->branch_cost;
3615 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3617 /* Enable by default the SSE and MMX builtins. Do allow the user to
3618 explicitly disable any of these. In particular, disabling SSE and
3619 MMX for kernel code is extremely useful. */
3620 if (!ix86_arch_specified)
3622 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3623 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3626 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3630 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3632 if (!ix86_arch_specified)
3634 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3636 /* i386 ABI does not specify red zone. It still makes sense to use it
3637 when programmer takes care to stack from being destroyed. */
3638 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3639 target_flags |= MASK_NO_RED_ZONE;
3642 /* Keep nonleaf frame pointers. */
3643 if (flag_omit_frame_pointer)
3644 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3645 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3646 flag_omit_frame_pointer = 1;
3648 /* If we're doing fast math, we don't care about comparison order
3649 wrt NaNs. This lets us use a shorter comparison sequence. */
3650 if (flag_finite_math_only)
3651 target_flags &= ~MASK_IEEE_FP;
3653 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3654 since the insns won't need emulation. */
3655 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3656 target_flags &= ~MASK_NO_FANCY_MATH_387;
3658 /* Likewise, if the target doesn't have a 387, or we've specified
3659 software floating point, don't use 387 inline intrinsics. */
3661 target_flags |= MASK_NO_FANCY_MATH_387;
3663 /* Turn on MMX builtins for -msse. */
3665 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3667 /* Enable SSE prefetch. */
3668 if (TARGET_SSE || (TARGET_PRFCHW && !TARGET_3DNOW))
3669 x86_prefetch_sse = true;
3671 /* Enable prefetch{,w} instructions for -m3dnow. */
3673 ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW & ~ix86_isa_flags_explicit;
3675 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3676 if (TARGET_SSE4_2 || TARGET_ABM)
3677 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3679 /* Enable lzcnt instruction for -mabm. */
3681 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3683 /* Validate -mpreferred-stack-boundary= value or default it to
3684 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3685 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3686 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3688 int min = (TARGET_64BIT ? (TARGET_SSE ? 4 : 3) : 2);
3689 int max = (TARGET_SEH ? 4 : 12);
3691 if (ix86_preferred_stack_boundary_arg < min
3692 || ix86_preferred_stack_boundary_arg > max)
3695 error ("-mpreferred-stack-boundary is not supported "
3698 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3699 ix86_preferred_stack_boundary_arg, min, max);
3702 ix86_preferred_stack_boundary
3703 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3706 /* Set the default value for -mstackrealign. */
3707 if (ix86_force_align_arg_pointer == -1)
3708 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3710 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3712 /* Validate -mincoming-stack-boundary= value or default it to
3713 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3714 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3715 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3717 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3718 || ix86_incoming_stack_boundary_arg > 12)
3719 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3720 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3723 ix86_user_incoming_stack_boundary
3724 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3725 ix86_incoming_stack_boundary
3726 = ix86_user_incoming_stack_boundary;
3730 /* Accept -msseregparm only if at least SSE support is enabled. */
3731 if (TARGET_SSEREGPARM
3733 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3735 if (global_options_set.x_ix86_fpmath)
3737 if (ix86_fpmath & FPMATH_SSE)
3741 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3742 ix86_fpmath = FPMATH_387;
3744 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3746 warning (0, "387 instruction set disabled, using SSE arithmetics");
3747 ix86_fpmath = FPMATH_SSE;
3752 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3754 /* If the i387 is disabled, then do not return values in it. */
3756 target_flags &= ~MASK_FLOAT_RETURNS;
3758 /* Use external vectorized library in vectorizing intrinsics. */
3759 if (global_options_set.x_ix86_veclibabi_type)
3760 switch (ix86_veclibabi_type)
3762 case ix86_veclibabi_type_svml:
3763 ix86_veclib_handler = ix86_veclibabi_svml;
3766 case ix86_veclibabi_type_acml:
3767 ix86_veclib_handler = ix86_veclibabi_acml;
3774 if ((!USE_IX86_FRAME_POINTER
3775 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3776 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3778 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3780 /* ??? Unwind info is not correct around the CFG unless either a frame
3781 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3782 unwind info generation to be aware of the CFG and propagating states
3784 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3785 || flag_exceptions || flag_non_call_exceptions)
3786 && flag_omit_frame_pointer
3787 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3789 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3790 warning (0, "unwind tables currently require either a frame pointer "
3791 "or %saccumulate-outgoing-args%s for correctness",
3793 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3796 /* If stack probes are required, the space used for large function
3797 arguments on the stack must also be probed, so enable
3798 -maccumulate-outgoing-args so this happens in the prologue. */
3799 if (TARGET_STACK_PROBE
3800 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3802 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3803 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3804 "for correctness", prefix, suffix);
3805 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3808 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3811 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3812 p = strchr (internal_label_prefix, 'X');
3813 internal_label_prefix_len = p - internal_label_prefix;
3817 /* When scheduling description is not available, disable scheduler pass
3818 so it won't slow down the compilation and make x87 code slower. */
3819 if (!TARGET_SCHEDULE)
3820 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3822 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3823 ix86_tune_cost->simultaneous_prefetches,
3824 global_options.x_param_values,
3825 global_options_set.x_param_values);
3826 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3827 ix86_tune_cost->prefetch_block,
3828 global_options.x_param_values,
3829 global_options_set.x_param_values);
3830 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3831 ix86_tune_cost->l1_cache_size,
3832 global_options.x_param_values,
3833 global_options_set.x_param_values);
3834 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
3835 ix86_tune_cost->l2_cache_size,
3836 global_options.x_param_values,
3837 global_options_set.x_param_values);
3839 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3840 if (flag_prefetch_loop_arrays < 0
3842 && (optimize >= 3 || flag_profile_use)
3843 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3844 flag_prefetch_loop_arrays = 1;
3846 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3847 can be optimized to ap = __builtin_next_arg (0). */
3848 if (!TARGET_64BIT && !flag_split_stack)
3849 targetm.expand_builtin_va_start = NULL;
3853 ix86_gen_leave = gen_leave_rex64;
3854 if (Pmode == DImode)
3856 ix86_gen_monitor = gen_sse3_monitor64_di;
3857 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
3858 ix86_gen_tls_local_dynamic_base_64
3859 = gen_tls_local_dynamic_base_64_di;
3863 ix86_gen_monitor = gen_sse3_monitor64_si;
3864 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
3865 ix86_gen_tls_local_dynamic_base_64
3866 = gen_tls_local_dynamic_base_64_si;
3871 ix86_gen_leave = gen_leave;
3872 ix86_gen_monitor = gen_sse3_monitor;
3875 if (Pmode == DImode)
3877 ix86_gen_add3 = gen_adddi3;
3878 ix86_gen_sub3 = gen_subdi3;
3879 ix86_gen_sub3_carry = gen_subdi3_carry;
3880 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3881 ix86_gen_andsp = gen_anddi3;
3882 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3883 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3884 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3888 ix86_gen_add3 = gen_addsi3;
3889 ix86_gen_sub3 = gen_subsi3;
3890 ix86_gen_sub3_carry = gen_subsi3_carry;
3891 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3892 ix86_gen_andsp = gen_andsi3;
3893 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3894 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3895 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3899 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3901 target_flags |= MASK_CLD & ~target_flags_explicit;
3904 if (!TARGET_64BIT && flag_pic)
3906 if (flag_fentry > 0)
3907 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3911 else if (TARGET_SEH)
3913 if (flag_fentry == 0)
3914 sorry ("-mno-fentry isn%'t compatible with SEH");
3917 else if (flag_fentry < 0)
3919 #if defined(PROFILE_BEFORE_PROLOGUE)
3928 /* When not optimize for size, enable vzeroupper optimization for
3929 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3930 AVX unaligned load/store. */
3933 if (flag_expensive_optimizations
3934 && !(target_flags_explicit & MASK_VZEROUPPER))
3935 target_flags |= MASK_VZEROUPPER;
3936 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3937 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3938 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3939 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3940 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3941 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3942 /* Enable 128-bit AVX instruction generation
3943 for the auto-vectorizer. */
3944 if (TARGET_AVX128_OPTIMAL
3945 && !(target_flags_explicit & MASK_PREFER_AVX128))
3946 target_flags |= MASK_PREFER_AVX128;
3951 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3952 target_flags &= ~MASK_VZEROUPPER;
3955 if (ix86_recip_name)
3957 char *p = ASTRDUP (ix86_recip_name);
3959 unsigned int mask, i;
3962 while ((q = strtok (p, ",")) != NULL)
3973 if (!strcmp (q, "default"))
3974 mask = RECIP_MASK_ALL;
3977 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3978 if (!strcmp (q, recip_options[i].string))
3980 mask = recip_options[i].mask;
3984 if (i == ARRAY_SIZE (recip_options))
3986 error ("unknown option for -mrecip=%s", q);
3988 mask = RECIP_MASK_NONE;
3992 recip_mask_explicit |= mask;
3994 recip_mask &= ~mask;
4001 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
4002 else if (target_flags_explicit & MASK_RECIP)
4003 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
4005 /* Default long double to 64-bit for Bionic. */
4006 if (TARGET_HAS_BIONIC
4007 && !(target_flags_explicit & MASK_LONG_DOUBLE_64))
4008 target_flags |= MASK_LONG_DOUBLE_64;
4010 /* Save the initial options in case the user does function specific
4013 target_option_default_node = target_option_current_node
4014 = build_target_option_node ();
4017 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4020 ix86_option_override (void)
4022 static struct register_pass_info insert_vzeroupper_info
4023 = { &pass_insert_vzeroupper.pass, "reload",
4024 1, PASS_POS_INSERT_AFTER
4027 ix86_option_override_internal (true);
4030 /* This needs to be done at start up. It's convenient to do it here. */
4031 register_pass (&insert_vzeroupper_info);
4034 /* Update register usage after having seen the compiler flags. */
4037 ix86_conditional_register_usage (void)
4042 /* The PIC register, if it exists, is fixed. */
4043 j = PIC_OFFSET_TABLE_REGNUM;
4044 if (j != INVALID_REGNUM)
4045 fixed_regs[j] = call_used_regs[j] = 1;
4047 /* For 32-bit targets, squash the REX registers. */
4050 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4051 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4052 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4053 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4056 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4057 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4058 : TARGET_64BIT ? (1 << 2)
4061 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4063 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4065 /* Set/reset conditionally defined registers from
4066 CALL_USED_REGISTERS initializer. */
4067 if (call_used_regs[i] > 1)
4068 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4070 /* Calculate registers of CLOBBERED_REGS register set
4071 as call used registers from GENERAL_REGS register set. */
4072 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4073 && call_used_regs[i])
4074 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4077 /* If MMX is disabled, squash the registers. */
4079 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4080 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4081 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4083 /* If SSE is disabled, squash the registers. */
4085 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4086 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4087 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4089 /* If the FPU is disabled, squash the registers. */
4090 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4091 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4092 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4093 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4097 /* Save the current options */
4100 ix86_function_specific_save (struct cl_target_option *ptr)
4102 ptr->arch = ix86_arch;
4103 ptr->schedule = ix86_schedule;
4104 ptr->tune = ix86_tune;
4105 ptr->branch_cost = ix86_branch_cost;
4106 ptr->tune_defaulted = ix86_tune_defaulted;
4107 ptr->arch_specified = ix86_arch_specified;
4108 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4109 ptr->ix86_target_flags_explicit = target_flags_explicit;
4110 ptr->x_recip_mask_explicit = recip_mask_explicit;
4112 /* The fields are char but the variables are not; make sure the
4113 values fit in the fields. */
4114 gcc_assert (ptr->arch == ix86_arch);
4115 gcc_assert (ptr->schedule == ix86_schedule);
4116 gcc_assert (ptr->tune == ix86_tune);
4117 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4120 /* Restore the current options */
4123 ix86_function_specific_restore (struct cl_target_option *ptr)
4125 enum processor_type old_tune = ix86_tune;
4126 enum processor_type old_arch = ix86_arch;
4127 unsigned int ix86_arch_mask, ix86_tune_mask;
4130 ix86_arch = (enum processor_type) ptr->arch;
4131 ix86_schedule = (enum attr_cpu) ptr->schedule;
4132 ix86_tune = (enum processor_type) ptr->tune;
4133 ix86_branch_cost = ptr->branch_cost;
4134 ix86_tune_defaulted = ptr->tune_defaulted;
4135 ix86_arch_specified = ptr->arch_specified;
4136 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4137 target_flags_explicit = ptr->ix86_target_flags_explicit;
4138 recip_mask_explicit = ptr->x_recip_mask_explicit;
4140 /* Recreate the arch feature tests if the arch changed */
4141 if (old_arch != ix86_arch)
4143 ix86_arch_mask = 1u << ix86_arch;
4144 for (i = 0; i < X86_ARCH_LAST; ++i)
4145 ix86_arch_features[i]
4146 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4149 /* Recreate the tune optimization tests */
4150 if (old_tune != ix86_tune)
4152 ix86_tune_mask = 1u << ix86_tune;
4153 for (i = 0; i < X86_TUNE_LAST; ++i)
4154 ix86_tune_features[i]
4155 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4159 /* Print the current options */
4162 ix86_function_specific_print (FILE *file, int indent,
4163 struct cl_target_option *ptr)
4166 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4167 NULL, NULL, ptr->x_ix86_fpmath, false);
4169 fprintf (file, "%*sarch = %d (%s)\n",
4172 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4173 ? cpu_names[ptr->arch]
4176 fprintf (file, "%*stune = %d (%s)\n",
4179 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4180 ? cpu_names[ptr->tune]
4183 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4187 fprintf (file, "%*s%s\n", indent, "", target_string);
4188 free (target_string);
4193 /* Inner function to process the attribute((target(...))), take an argument and
4194 set the current options from the argument. If we have a list, recursively go
4198 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4199 struct gcc_options *enum_opts_set)
4204 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4205 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4206 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4207 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4208 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4224 enum ix86_opt_type type;
4229 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4230 IX86_ATTR_ISA ("abm", OPT_mabm),
4231 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4232 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4233 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4234 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4235 IX86_ATTR_ISA ("aes", OPT_maes),
4236 IX86_ATTR_ISA ("avx", OPT_mavx),
4237 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4238 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4239 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4240 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4241 IX86_ATTR_ISA ("sse", OPT_msse),
4242 IX86_ATTR_ISA ("sse2", OPT_msse2),
4243 IX86_ATTR_ISA ("sse3", OPT_msse3),
4244 IX86_ATTR_ISA ("sse4", OPT_msse4),
4245 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4246 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4247 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4248 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4249 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4250 IX86_ATTR_ISA ("fma", OPT_mfma),
4251 IX86_ATTR_ISA ("xop", OPT_mxop),
4252 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4253 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4254 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4255 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4256 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4257 IX86_ATTR_ISA ("hle", OPT_mhle),
4258 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4259 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4260 IX86_ATTR_ISA ("adx", OPT_madx),
4261 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4262 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4263 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4266 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4268 /* string options */
4269 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4270 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4273 IX86_ATTR_YES ("cld",
4277 IX86_ATTR_NO ("fancy-math-387",
4278 OPT_mfancy_math_387,
4279 MASK_NO_FANCY_MATH_387),
4281 IX86_ATTR_YES ("ieee-fp",
4285 IX86_ATTR_YES ("inline-all-stringops",
4286 OPT_minline_all_stringops,
4287 MASK_INLINE_ALL_STRINGOPS),
4289 IX86_ATTR_YES ("inline-stringops-dynamically",
4290 OPT_minline_stringops_dynamically,
4291 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4293 IX86_ATTR_NO ("align-stringops",
4294 OPT_mno_align_stringops,
4295 MASK_NO_ALIGN_STRINGOPS),
4297 IX86_ATTR_YES ("recip",
4303 /* If this is a list, recurse to get the options. */
4304 if (TREE_CODE (args) == TREE_LIST)
4308 for (; args; args = TREE_CHAIN (args))
4309 if (TREE_VALUE (args)
4310 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4311 p_strings, enum_opts_set))
4317 else if (TREE_CODE (args) != STRING_CST)
4319 error ("attribute %<target%> argument not a string");
4323 /* Handle multiple arguments separated by commas. */
4324 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4326 while (next_optstr && *next_optstr != '\0')
4328 char *p = next_optstr;
4330 char *comma = strchr (next_optstr, ',');
4331 const char *opt_string;
4332 size_t len, opt_len;
4337 enum ix86_opt_type type = ix86_opt_unknown;
4343 len = comma - next_optstr;
4344 next_optstr = comma + 1;
4352 /* Recognize no-xxx. */
4353 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4362 /* Find the option. */
4365 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4367 type = attrs[i].type;
4368 opt_len = attrs[i].len;
4369 if (ch == attrs[i].string[0]
4370 && ((type != ix86_opt_str && type != ix86_opt_enum)
4373 && memcmp (p, attrs[i].string, opt_len) == 0)
4376 mask = attrs[i].mask;
4377 opt_string = attrs[i].string;
4382 /* Process the option. */
4385 error ("attribute(target(\"%s\")) is unknown", orig_p);
4389 else if (type == ix86_opt_isa)
4391 struct cl_decoded_option decoded;
4393 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4394 ix86_handle_option (&global_options, &global_options_set,
4395 &decoded, input_location);
4398 else if (type == ix86_opt_yes || type == ix86_opt_no)
4400 if (type == ix86_opt_no)
4401 opt_set_p = !opt_set_p;
4404 target_flags |= mask;
4406 target_flags &= ~mask;
4409 else if (type == ix86_opt_str)
4413 error ("option(\"%s\") was already specified", opt_string);
4417 p_strings[opt] = xstrdup (p + opt_len);
4420 else if (type == ix86_opt_enum)
4425 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4427 set_option (&global_options, enum_opts_set, opt, value,
4428 p + opt_len, DK_UNSPECIFIED, input_location,
4432 error ("attribute(target(\"%s\")) is unknown", orig_p);
4444 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4447 ix86_valid_target_attribute_tree (tree args)
4449 const char *orig_arch_string = ix86_arch_string;
4450 const char *orig_tune_string = ix86_tune_string;
4451 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4452 int orig_tune_defaulted = ix86_tune_defaulted;
4453 int orig_arch_specified = ix86_arch_specified;
4454 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4457 struct cl_target_option *def
4458 = TREE_TARGET_OPTION (target_option_default_node);
4459 struct gcc_options enum_opts_set;
4461 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4463 /* Process each of the options on the chain. */
4464 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4466 return error_mark_node;
4468 /* If the changed options are different from the default, rerun
4469 ix86_option_override_internal, and then save the options away.
4470 The string options are are attribute options, and will be undone
4471 when we copy the save structure. */
4472 if (ix86_isa_flags != def->x_ix86_isa_flags
4473 || target_flags != def->x_target_flags
4474 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4475 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4476 || enum_opts_set.x_ix86_fpmath)
4478 /* If we are using the default tune= or arch=, undo the string assigned,
4479 and use the default. */
4480 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4481 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4482 else if (!orig_arch_specified)
4483 ix86_arch_string = NULL;
4485 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4486 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4487 else if (orig_tune_defaulted)
4488 ix86_tune_string = NULL;
4490 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4491 if (enum_opts_set.x_ix86_fpmath)
4492 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4493 else if (!TARGET_64BIT && TARGET_SSE)
4495 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4496 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4499 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4500 ix86_option_override_internal (false);
4502 /* Add any builtin functions with the new isa if any. */
4503 ix86_add_new_builtins (ix86_isa_flags);
4505 /* Save the current options unless we are validating options for
4507 t = build_target_option_node ();
4509 ix86_arch_string = orig_arch_string;
4510 ix86_tune_string = orig_tune_string;
4511 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4513 /* Free up memory allocated to hold the strings */
4514 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4515 free (option_strings[i]);
4521 /* Hook to validate attribute((target("string"))). */
4524 ix86_valid_target_attribute_p (tree fndecl,
4525 tree ARG_UNUSED (name),
4527 int ARG_UNUSED (flags))
4529 struct cl_target_option cur_target;
4532 /* attribute((target("default"))) does nothing, beyond
4533 affecting multi-versioning. */
4534 if (TREE_VALUE (args)
4535 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4536 && TREE_CHAIN (args) == NULL_TREE
4537 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4540 tree old_optimize = build_optimization_node ();
4541 tree new_target, new_optimize;
4542 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4544 /* If the function changed the optimization levels as well as setting target
4545 options, start with the optimizations specified. */
4546 if (func_optimize && func_optimize != old_optimize)
4547 cl_optimization_restore (&global_options,
4548 TREE_OPTIMIZATION (func_optimize));
4550 /* The target attributes may also change some optimization flags, so update
4551 the optimization options if necessary. */
4552 cl_target_option_save (&cur_target, &global_options);
4553 new_target = ix86_valid_target_attribute_tree (args);
4554 new_optimize = build_optimization_node ();
4556 if (new_target == error_mark_node)
4559 else if (fndecl && new_target)
4561 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4563 if (old_optimize != new_optimize)
4564 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4567 cl_target_option_restore (&global_options, &cur_target);
4569 if (old_optimize != new_optimize)
4570 cl_optimization_restore (&global_options,
4571 TREE_OPTIMIZATION (old_optimize));
4577 /* Hook to determine if one function can safely inline another. */
4580 ix86_can_inline_p (tree caller, tree callee)
4583 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4584 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4586 /* If callee has no option attributes, then it is ok to inline. */
4590 /* If caller has no option attributes, but callee does then it is not ok to
4592 else if (!caller_tree)
4597 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4598 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4600 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4601 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4603 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4604 != callee_opts->x_ix86_isa_flags)
4607 /* See if we have the same non-isa options. */
4608 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4611 /* See if arch, tune, etc. are the same. */
4612 else if (caller_opts->arch != callee_opts->arch)
4615 else if (caller_opts->tune != callee_opts->tune)
4618 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4621 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4632 /* Remember the last target of ix86_set_current_function. */
4633 static GTY(()) tree ix86_previous_fndecl;
4635 /* Establish appropriate back-end context for processing the function
4636 FNDECL. The argument might be NULL to indicate processing at top
4637 level, outside of any function scope. */
4639 ix86_set_current_function (tree fndecl)
4641 /* Only change the context if the function changes. This hook is called
4642 several times in the course of compiling a function, and we don't want to
4643 slow things down too much or call target_reinit when it isn't safe. */
4644 if (fndecl && fndecl != ix86_previous_fndecl)
4646 tree old_tree = (ix86_previous_fndecl
4647 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4650 tree new_tree = (fndecl
4651 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4654 ix86_previous_fndecl = fndecl;
4655 if (old_tree == new_tree)
4660 cl_target_option_restore (&global_options,
4661 TREE_TARGET_OPTION (new_tree));
4667 struct cl_target_option *def
4668 = TREE_TARGET_OPTION (target_option_current_node);
4670 cl_target_option_restore (&global_options, def);
4677 /* Return true if this goes in large data/bss. */
4680 ix86_in_large_data_p (tree exp)
4682 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4685 /* Functions are never large data. */
4686 if (TREE_CODE (exp) == FUNCTION_DECL)
4689 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4691 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4692 if (strcmp (section, ".ldata") == 0
4693 || strcmp (section, ".lbss") == 0)
4699 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4701 /* If this is an incomplete type with size 0, then we can't put it
4702 in data because it might be too big when completed. */
4703 if (!size || size > ix86_section_threshold)
4710 /* Switch to the appropriate section for output of DECL.
4711 DECL is either a `VAR_DECL' node or a constant of some sort.
4712 RELOC indicates whether forming the initial value of DECL requires
4713 link-time relocations. */
4715 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4719 x86_64_elf_select_section (tree decl, int reloc,
4720 unsigned HOST_WIDE_INT align)
4722 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4723 && ix86_in_large_data_p (decl))
4725 const char *sname = NULL;
4726 unsigned int flags = SECTION_WRITE;
4727 switch (categorize_decl_for_section (decl, reloc))
4732 case SECCAT_DATA_REL:
4733 sname = ".ldata.rel";
4735 case SECCAT_DATA_REL_LOCAL:
4736 sname = ".ldata.rel.local";
4738 case SECCAT_DATA_REL_RO:
4739 sname = ".ldata.rel.ro";
4741 case SECCAT_DATA_REL_RO_LOCAL:
4742 sname = ".ldata.rel.ro.local";
4746 flags |= SECTION_BSS;
4749 case SECCAT_RODATA_MERGE_STR:
4750 case SECCAT_RODATA_MERGE_STR_INIT:
4751 case SECCAT_RODATA_MERGE_CONST:
4755 case SECCAT_SRODATA:
4762 /* We don't split these for medium model. Place them into
4763 default sections and hope for best. */
4768 /* We might get called with string constants, but get_named_section
4769 doesn't like them as they are not DECLs. Also, we need to set
4770 flags in that case. */
4772 return get_section (sname, flags, NULL);
4773 return get_named_section (decl, sname, reloc);
4776 return default_elf_select_section (decl, reloc, align);
4779 /* Build up a unique section name, expressed as a
4780 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4781 RELOC indicates whether the initial value of EXP requires
4782 link-time relocations. */
4784 static void ATTRIBUTE_UNUSED
4785 x86_64_elf_unique_section (tree decl, int reloc)
4787 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4788 && ix86_in_large_data_p (decl))
4790 const char *prefix = NULL;
4791 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4792 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4794 switch (categorize_decl_for_section (decl, reloc))
4797 case SECCAT_DATA_REL:
4798 case SECCAT_DATA_REL_LOCAL:
4799 case SECCAT_DATA_REL_RO:
4800 case SECCAT_DATA_REL_RO_LOCAL:
4801 prefix = one_only ? ".ld" : ".ldata";
4804 prefix = one_only ? ".lb" : ".lbss";
4807 case SECCAT_RODATA_MERGE_STR:
4808 case SECCAT_RODATA_MERGE_STR_INIT:
4809 case SECCAT_RODATA_MERGE_CONST:
4810 prefix = one_only ? ".lr" : ".lrodata";
4812 case SECCAT_SRODATA:
4819 /* We don't split these for medium model. Place them into
4820 default sections and hope for best. */
4825 const char *name, *linkonce;
4828 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4829 name = targetm.strip_name_encoding (name);
4831 /* If we're using one_only, then there needs to be a .gnu.linkonce
4832 prefix to the section name. */
4833 linkonce = one_only ? ".gnu.linkonce" : "";
4835 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4837 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4841 default_unique_section (decl, reloc);
4844 #ifdef COMMON_ASM_OP
4845 /* This says how to output assembler code to declare an
4846 uninitialized external linkage data object.
4848 For medium model x86-64 we need to use .largecomm opcode for
4851 x86_elf_aligned_common (FILE *file,
4852 const char *name, unsigned HOST_WIDE_INT size,
4855 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4856 && size > (unsigned int)ix86_section_threshold)
4857 fputs (".largecomm\t", file);
4859 fputs (COMMON_ASM_OP, file);
4860 assemble_name (file, name);
4861 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4862 size, align / BITS_PER_UNIT);
4866 /* Utility function for targets to use in implementing
4867 ASM_OUTPUT_ALIGNED_BSS. */
4870 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4871 const char *name, unsigned HOST_WIDE_INT size,
4874 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4875 && size > (unsigned int)ix86_section_threshold)
4876 switch_to_section (get_named_section (decl, ".lbss", 0));
4878 switch_to_section (bss_section);
4879 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4880 #ifdef ASM_DECLARE_OBJECT_NAME
4881 last_assemble_variable_decl = decl;
4882 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4884 /* Standard thing is just output label for the object. */
4885 ASM_OUTPUT_LABEL (file, name);
4886 #endif /* ASM_DECLARE_OBJECT_NAME */
4887 ASM_OUTPUT_SKIP (file, size ? size : 1);
4890 /* Decide whether we must probe the stack before any space allocation
4891 on this target. It's essentially TARGET_STACK_PROBE except when
4892 -fstack-check causes the stack to be already probed differently. */
4895 ix86_target_stack_probe (void)
4897 /* Do not probe the stack twice if static stack checking is enabled. */
4898 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4901 return TARGET_STACK_PROBE;
4904 /* Decide whether we can make a sibling call to a function. DECL is the
4905 declaration of the function being targeted by the call and EXP is the
4906 CALL_EXPR representing the call. */
4909 ix86_function_ok_for_sibcall (tree decl, tree exp)
4911 tree type, decl_or_type;
4914 /* If we are generating position-independent code, we cannot sibcall
4915 optimize any indirect call, or a direct call to a global function,
4916 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4920 && (!decl || !targetm.binds_local_p (decl)))
4923 /* If we need to align the outgoing stack, then sibcalling would
4924 unalign the stack, which may break the called function. */
4925 if (ix86_minimum_incoming_stack_boundary (true)
4926 < PREFERRED_STACK_BOUNDARY)
4931 decl_or_type = decl;
4932 type = TREE_TYPE (decl);
4936 /* We're looking at the CALL_EXPR, we need the type of the function. */
4937 type = CALL_EXPR_FN (exp); /* pointer expression */
4938 type = TREE_TYPE (type); /* pointer type */
4939 type = TREE_TYPE (type); /* function type */
4940 decl_or_type = type;
4943 /* Check that the return value locations are the same. Like
4944 if we are returning floats on the 80387 register stack, we cannot
4945 make a sibcall from a function that doesn't return a float to a
4946 function that does or, conversely, from a function that does return
4947 a float to a function that doesn't; the necessary stack adjustment
4948 would not be executed. This is also the place we notice
4949 differences in the return value ABI. Note that it is ok for one
4950 of the functions to have void return type as long as the return
4951 value of the other is passed in a register. */
4952 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4953 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4955 if (STACK_REG_P (a) || STACK_REG_P (b))
4957 if (!rtx_equal_p (a, b))
4960 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4962 else if (!rtx_equal_p (a, b))
4967 /* The SYSV ABI has more call-clobbered registers;
4968 disallow sibcalls from MS to SYSV. */
4969 if (cfun->machine->call_abi == MS_ABI
4970 && ix86_function_type_abi (type) == SYSV_ABI)
4975 /* If this call is indirect, we'll need to be able to use a
4976 call-clobbered register for the address of the target function.
4977 Make sure that all such registers are not used for passing
4978 parameters. Note that DLLIMPORT functions are indirect. */
4980 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4982 if (ix86_function_regparm (type, NULL) >= 3)
4984 /* ??? Need to count the actual number of registers to be used,
4985 not the possible number of registers. Fix later. */
4991 /* Otherwise okay. That also includes certain types of indirect calls. */
4995 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4996 and "sseregparm" calling convention attributes;
4997 arguments as in struct attribute_spec.handler. */
5000 ix86_handle_cconv_attribute (tree *node, tree name,
5002 int flags ATTRIBUTE_UNUSED,
5005 if (TREE_CODE (*node) != FUNCTION_TYPE
5006 && TREE_CODE (*node) != METHOD_TYPE
5007 && TREE_CODE (*node) != FIELD_DECL
5008 && TREE_CODE (*node) != TYPE_DECL)
5010 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5012 *no_add_attrs = true;
5016 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5017 if (is_attribute_p ("regparm", name))
5021 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5023 error ("fastcall and regparm attributes are not compatible");
5026 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5028 error ("regparam and thiscall attributes are not compatible");
5031 cst = TREE_VALUE (args);
5032 if (TREE_CODE (cst) != INTEGER_CST)
5034 warning (OPT_Wattributes,
5035 "%qE attribute requires an integer constant argument",
5037 *no_add_attrs = true;
5039 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5041 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5043 *no_add_attrs = true;
5051 /* Do not warn when emulating the MS ABI. */
5052 if ((TREE_CODE (*node) != FUNCTION_TYPE
5053 && TREE_CODE (*node) != METHOD_TYPE)
5054 || ix86_function_type_abi (*node) != MS_ABI)
5055 warning (OPT_Wattributes, "%qE attribute ignored",
5057 *no_add_attrs = true;
5061 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5062 if (is_attribute_p ("fastcall", name))
5064 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5066 error ("fastcall and cdecl attributes are not compatible");
5068 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5070 error ("fastcall and stdcall attributes are not compatible");
5072 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5074 error ("fastcall and regparm attributes are not compatible");
5076 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5078 error ("fastcall and thiscall attributes are not compatible");
5082 /* Can combine stdcall with fastcall (redundant), regparm and
5084 else if (is_attribute_p ("stdcall", name))
5086 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5088 error ("stdcall and cdecl attributes are not compatible");
5090 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5092 error ("stdcall and fastcall attributes are not compatible");
5094 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5096 error ("stdcall and thiscall attributes are not compatible");
5100 /* Can combine cdecl with regparm and sseregparm. */
5101 else if (is_attribute_p ("cdecl", name))
5103 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5105 error ("stdcall and cdecl attributes are not compatible");
5107 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5109 error ("fastcall and cdecl attributes are not compatible");
5111 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5113 error ("cdecl and thiscall attributes are not compatible");
5116 else if (is_attribute_p ("thiscall", name))
5118 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5119 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5121 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5123 error ("stdcall and thiscall attributes are not compatible");
5125 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5127 error ("fastcall and thiscall attributes are not compatible");
5129 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5131 error ("cdecl and thiscall attributes are not compatible");
5135 /* Can combine sseregparm with all attributes. */
5140 /* The transactional memory builtins are implicitly regparm or fastcall
5141 depending on the ABI. Override the generic do-nothing attribute that
5142 these builtins were declared with, and replace it with one of the two
5143 attributes that we expect elsewhere. */
5146 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5147 tree args ATTRIBUTE_UNUSED,
5148 int flags ATTRIBUTE_UNUSED,
5153 /* In no case do we want to add the placeholder attribute. */
5154 *no_add_attrs = true;
5156 /* The 64-bit ABI is unchanged for transactional memory. */
5160 /* ??? Is there a better way to validate 32-bit windows? We have
5161 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5162 if (CHECK_STACK_LIMIT > 0)
5163 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5166 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5167 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5169 decl_attributes (node, alt, flags);
5174 /* This function determines from TYPE the calling-convention. */
5177 ix86_get_callcvt (const_tree type)
5179 unsigned int ret = 0;
5184 return IX86_CALLCVT_CDECL;
5186 attrs = TYPE_ATTRIBUTES (type);
5187 if (attrs != NULL_TREE)
5189 if (lookup_attribute ("cdecl", attrs))
5190 ret |= IX86_CALLCVT_CDECL;
5191 else if (lookup_attribute ("stdcall", attrs))
5192 ret |= IX86_CALLCVT_STDCALL;
5193 else if (lookup_attribute ("fastcall", attrs))
5194 ret |= IX86_CALLCVT_FASTCALL;
5195 else if (lookup_attribute ("thiscall", attrs))
5196 ret |= IX86_CALLCVT_THISCALL;
5198 /* Regparam isn't allowed for thiscall and fastcall. */
5199 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5201 if (lookup_attribute ("regparm", attrs))
5202 ret |= IX86_CALLCVT_REGPARM;
5203 if (lookup_attribute ("sseregparm", attrs))
5204 ret |= IX86_CALLCVT_SSEREGPARM;
5207 if (IX86_BASE_CALLCVT(ret) != 0)
5211 is_stdarg = stdarg_p (type);
5212 if (TARGET_RTD && !is_stdarg)
5213 return IX86_CALLCVT_STDCALL | ret;
5217 || TREE_CODE (type) != METHOD_TYPE
5218 || ix86_function_type_abi (type) != MS_ABI)
5219 return IX86_CALLCVT_CDECL | ret;
5221 return IX86_CALLCVT_THISCALL;
5224 /* Return 0 if the attributes for two types are incompatible, 1 if they
5225 are compatible, and 2 if they are nearly compatible (which causes a
5226 warning to be generated). */
5229 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5231 unsigned int ccvt1, ccvt2;
5233 if (TREE_CODE (type1) != FUNCTION_TYPE
5234 && TREE_CODE (type1) != METHOD_TYPE)
5237 ccvt1 = ix86_get_callcvt (type1);
5238 ccvt2 = ix86_get_callcvt (type2);
5241 if (ix86_function_regparm (type1, NULL)
5242 != ix86_function_regparm (type2, NULL))
5248 /* Return the regparm value for a function with the indicated TYPE and DECL.
5249 DECL may be NULL when calling function indirectly
5250 or considering a libcall. */
5253 ix86_function_regparm (const_tree type, const_tree decl)
5260 return (ix86_function_type_abi (type) == SYSV_ABI
5261 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5262 ccvt = ix86_get_callcvt (type);
5263 regparm = ix86_regparm;
5265 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5267 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5270 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5274 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5276 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5279 /* Use register calling convention for local functions when possible. */
5281 && TREE_CODE (decl) == FUNCTION_DECL
5283 && !(profile_flag && !flag_fentry))
5285 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5286 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5287 if (i && i->local && i->can_change_signature)
5289 int local_regparm, globals = 0, regno;
5291 /* Make sure no regparm register is taken by a
5292 fixed register variable. */
5293 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5294 if (fixed_regs[local_regparm])
5297 /* We don't want to use regparm(3) for nested functions as
5298 these use a static chain pointer in the third argument. */
5299 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5302 /* In 32-bit mode save a register for the split stack. */
5303 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5306 /* Each fixed register usage increases register pressure,
5307 so less registers should be used for argument passing.
5308 This functionality can be overriden by an explicit
5310 for (regno = AX_REG; regno <= DI_REG; regno++)
5311 if (fixed_regs[regno])
5315 = globals < local_regparm ? local_regparm - globals : 0;
5317 if (local_regparm > regparm)
5318 regparm = local_regparm;
5325 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5326 DFmode (2) arguments in SSE registers for a function with the
5327 indicated TYPE and DECL. DECL may be NULL when calling function
5328 indirectly or considering a libcall. Otherwise return 0. */
5331 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5333 gcc_assert (!TARGET_64BIT);
5335 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5336 by the sseregparm attribute. */
5337 if (TARGET_SSEREGPARM
5338 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5345 error ("calling %qD with attribute sseregparm without "
5346 "SSE/SSE2 enabled", decl);
5348 error ("calling %qT with attribute sseregparm without "
5349 "SSE/SSE2 enabled", type);
5357 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5358 (and DFmode for SSE2) arguments in SSE registers. */
5359 if (decl && TARGET_SSE_MATH && optimize
5360 && !(profile_flag && !flag_fentry))
5362 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5363 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5364 if (i && i->local && i->can_change_signature)
5365 return TARGET_SSE2 ? 2 : 1;
5371 /* Return true if EAX is live at the start of the function. Used by
5372 ix86_expand_prologue to determine if we need special help before
5373 calling allocate_stack_worker. */
5376 ix86_eax_live_at_start_p (void)
5378 /* Cheat. Don't bother working forward from ix86_function_regparm
5379 to the function type to whether an actual argument is located in
5380 eax. Instead just look at cfg info, which is still close enough
5381 to correct at this point. This gives false positives for broken
5382 functions that might use uninitialized data that happens to be
5383 allocated in eax, but who cares? */
5384 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5388 ix86_keep_aggregate_return_pointer (tree fntype)
5394 attr = lookup_attribute ("callee_pop_aggregate_return",
5395 TYPE_ATTRIBUTES (fntype));
5397 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5399 /* For 32-bit MS-ABI the default is to keep aggregate
5401 if (ix86_function_type_abi (fntype) == MS_ABI)
5404 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5407 /* Value is the number of bytes of arguments automatically
5408 popped when returning from a subroutine call.
5409 FUNDECL is the declaration node of the function (as a tree),
5410 FUNTYPE is the data type of the function (as a tree),
5411 or for a library call it is an identifier node for the subroutine name.
5412 SIZE is the number of bytes of arguments passed on the stack.
5414 On the 80386, the RTD insn may be used to pop them if the number
5415 of args is fixed, but if the number is variable then the caller
5416 must pop them all. RTD can't be used for library calls now
5417 because the library is compiled with the Unix compiler.
5418 Use of RTD is a selectable option, since it is incompatible with
5419 standard Unix calling sequences. If the option is not selected,
5420 the caller must always pop the args.
5422 The attribute stdcall is equivalent to RTD on a per module basis. */
5425 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5429 /* None of the 64-bit ABIs pop arguments. */
5433 ccvt = ix86_get_callcvt (funtype);
5435 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5436 | IX86_CALLCVT_THISCALL)) != 0
5437 && ! stdarg_p (funtype))
5440 /* Lose any fake structure return argument if it is passed on the stack. */
5441 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5442 && !ix86_keep_aggregate_return_pointer (funtype))
5444 int nregs = ix86_function_regparm (funtype, fundecl);
5446 return GET_MODE_SIZE (Pmode);
5452 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5455 ix86_legitimate_combined_insn (rtx insn)
5457 /* Check operand constraints in case hard registers were propagated
5458 into insn pattern. This check prevents combine pass from
5459 generating insn patterns with invalid hard register operands.
5460 These invalid insns can eventually confuse reload to error out
5461 with a spill failure. See also PRs 46829 and 46843. */
5462 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5466 extract_insn (insn);
5467 preprocess_constraints ();
5469 for (i = 0; i < recog_data.n_operands; i++)
5471 rtx op = recog_data.operand[i];
5472 enum machine_mode mode = GET_MODE (op);
5473 struct operand_alternative *op_alt;
5478 /* A unary operator may be accepted by the predicate, but it
5479 is irrelevant for matching constraints. */
5483 if (GET_CODE (op) == SUBREG)
5485 if (REG_P (SUBREG_REG (op))
5486 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5487 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5488 GET_MODE (SUBREG_REG (op)),
5491 op = SUBREG_REG (op);
5494 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5497 op_alt = recog_op_alt[i];
5499 /* Operand has no constraints, anything is OK. */
5500 win = !recog_data.n_alternatives;
5502 for (j = 0; j < recog_data.n_alternatives; j++)
5504 if (op_alt[j].anything_ok
5505 || (op_alt[j].matches != -1
5507 (recog_data.operand[i],
5508 recog_data.operand[op_alt[j].matches]))
5509 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5524 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5526 static unsigned HOST_WIDE_INT
5527 ix86_asan_shadow_offset (void)
5529 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5530 : HOST_WIDE_INT_C (0x7fff8000))
5531 : (HOST_WIDE_INT_1 << 29);
5534 /* Argument support functions. */
5536 /* Return true when register may be used to pass function parameters. */
5538 ix86_function_arg_regno_p (int regno)
5541 const int *parm_regs;
5546 return (regno < REGPARM_MAX
5547 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5549 return (regno < REGPARM_MAX
5550 || (TARGET_MMX && MMX_REGNO_P (regno)
5551 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5552 || (TARGET_SSE && SSE_REGNO_P (regno)
5553 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5558 if (SSE_REGNO_P (regno) && TARGET_SSE)
5563 if (TARGET_SSE && SSE_REGNO_P (regno)
5564 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5568 /* TODO: The function should depend on current function ABI but
5569 builtins.c would need updating then. Therefore we use the
5572 /* RAX is used as hidden argument to va_arg functions. */
5573 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5576 if (ix86_abi == MS_ABI)
5577 parm_regs = x86_64_ms_abi_int_parameter_registers;
5579 parm_regs = x86_64_int_parameter_registers;
5580 for (i = 0; i < (ix86_abi == MS_ABI
5581 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5582 if (regno == parm_regs[i])
5587 /* Return if we do not know how to pass TYPE solely in registers. */
5590 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5592 if (must_pass_in_stack_var_size_or_pad (mode, type))
5595 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5596 The layout_type routine is crafty and tries to trick us into passing
5597 currently unsupported vector types on the stack by using TImode. */
5598 return (!TARGET_64BIT && mode == TImode
5599 && type && TREE_CODE (type) != VECTOR_TYPE);
5602 /* It returns the size, in bytes, of the area reserved for arguments passed
5603 in registers for the function represented by fndecl dependent to the used
5606 ix86_reg_parm_stack_space (const_tree fndecl)
5608 enum calling_abi call_abi = SYSV_ABI;
5609 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5610 call_abi = ix86_function_abi (fndecl);
5612 call_abi = ix86_function_type_abi (fndecl);
5613 if (TARGET_64BIT && call_abi == MS_ABI)
5618 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5621 ix86_function_type_abi (const_tree fntype)
5623 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5625 enum calling_abi abi = ix86_abi;
5626 if (abi == SYSV_ABI)
5628 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5631 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5639 ix86_function_ms_hook_prologue (const_tree fn)
5641 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5643 if (decl_function_context (fn) != NULL_TREE)
5644 error_at (DECL_SOURCE_LOCATION (fn),
5645 "ms_hook_prologue is not compatible with nested function");
5652 static enum calling_abi
5653 ix86_function_abi (const_tree fndecl)
5657 return ix86_function_type_abi (TREE_TYPE (fndecl));
5660 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5663 ix86_cfun_abi (void)
5667 return cfun->machine->call_abi;
5670 /* Write the extra assembler code needed to declare a function properly. */
5673 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5676 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5680 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5681 unsigned int filler_cc = 0xcccccccc;
5683 for (i = 0; i < filler_count; i += 4)
5684 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5687 #ifdef SUBTARGET_ASM_UNWIND_INIT
5688 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5691 ASM_OUTPUT_LABEL (asm_out_file, fname);
5693 /* Output magic byte marker, if hot-patch attribute is set. */
5698 /* leaq [%rsp + 0], %rsp */
5699 asm_fprintf (asm_out_file, ASM_BYTE
5700 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5704 /* movl.s %edi, %edi
5706 movl.s %esp, %ebp */
5707 asm_fprintf (asm_out_file, ASM_BYTE
5708 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5714 extern void init_regs (void);
5716 /* Implementation of call abi switching target hook. Specific to FNDECL
5717 the specific call register sets are set. See also
5718 ix86_conditional_register_usage for more details. */
5720 ix86_call_abi_override (const_tree fndecl)
5722 if (fndecl == NULL_TREE)
5723 cfun->machine->call_abi = ix86_abi;
5725 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5728 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5729 expensive re-initialization of init_regs each time we switch function context
5730 since this is needed only during RTL expansion. */
5732 ix86_maybe_switch_abi (void)
5735 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5739 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5740 for a call to a function whose data type is FNTYPE.
5741 For a library call, FNTYPE is 0. */
5744 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5745 tree fntype, /* tree ptr for function decl */
5746 rtx libname, /* SYMBOL_REF of library name or 0 */
5750 struct cgraph_local_info *i;
5752 memset (cum, 0, sizeof (*cum));
5756 i = cgraph_local_info (fndecl);
5757 cum->call_abi = ix86_function_abi (fndecl);
5762 cum->call_abi = ix86_function_type_abi (fntype);
5765 cum->caller = caller;
5767 /* Set up the number of registers to use for passing arguments. */
5769 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5770 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5771 "or subtarget optimization implying it");
5772 cum->nregs = ix86_regparm;
5775 cum->nregs = (cum->call_abi == SYSV_ABI
5776 ? X86_64_REGPARM_MAX
5777 : X86_64_MS_REGPARM_MAX);
5781 cum->sse_nregs = SSE_REGPARM_MAX;
5784 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5785 ? X86_64_SSE_REGPARM_MAX
5786 : X86_64_MS_SSE_REGPARM_MAX);
5790 cum->mmx_nregs = MMX_REGPARM_MAX;
5791 cum->warn_avx = true;
5792 cum->warn_sse = true;
5793 cum->warn_mmx = true;
5795 /* Because type might mismatch in between caller and callee, we need to
5796 use actual type of function for local calls.
5797 FIXME: cgraph_analyze can be told to actually record if function uses
5798 va_start so for local functions maybe_vaarg can be made aggressive
5800 FIXME: once typesytem is fixed, we won't need this code anymore. */
5801 if (i && i->local && i->can_change_signature)
5802 fntype = TREE_TYPE (fndecl);
5803 cum->maybe_vaarg = (fntype
5804 ? (!prototype_p (fntype) || stdarg_p (fntype))
5809 /* If there are variable arguments, then we won't pass anything
5810 in registers in 32-bit mode. */
5811 if (stdarg_p (fntype))
5822 /* Use ecx and edx registers if function has fastcall attribute,
5823 else look for regparm information. */
5826 unsigned int ccvt = ix86_get_callcvt (fntype);
5827 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5830 cum->fastcall = 1; /* Same first register as in fastcall. */
5832 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5838 cum->nregs = ix86_function_regparm (fntype, fndecl);
5841 /* Set up the number of SSE registers used for passing SFmode
5842 and DFmode arguments. Warn for mismatching ABI. */
5843 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5847 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5848 But in the case of vector types, it is some vector mode.
5850 When we have only some of our vector isa extensions enabled, then there
5851 are some modes for which vector_mode_supported_p is false. For these
5852 modes, the generic vector support in gcc will choose some non-vector mode
5853 in order to implement the type. By computing the natural mode, we'll
5854 select the proper ABI location for the operand and not depend on whatever
5855 the middle-end decides to do with these vector types.
5857 The midde-end can't deal with the vector types > 16 bytes. In this
5858 case, we return the original mode and warn ABI change if CUM isn't
5861 static enum machine_mode
5862 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5864 enum machine_mode mode = TYPE_MODE (type);
5866 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5868 HOST_WIDE_INT size = int_size_in_bytes (type);
5869 if ((size == 8 || size == 16 || size == 32)
5870 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5871 && TYPE_VECTOR_SUBPARTS (type) > 1)
5873 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5875 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5876 mode = MIN_MODE_VECTOR_FLOAT;
5878 mode = MIN_MODE_VECTOR_INT;
5880 /* Get the mode which has this inner mode and number of units. */
5881 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5882 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5883 && GET_MODE_INNER (mode) == innermode)
5885 if (size == 32 && !TARGET_AVX)
5887 static bool warnedavx;
5894 warning (0, "AVX vector argument without AVX "
5895 "enabled changes the ABI");
5897 return TYPE_MODE (type);
5899 else if ((size == 8 || size == 16) && !TARGET_SSE)
5901 static bool warnedsse;
5908 warning (0, "SSE vector argument without SSE "
5909 "enabled changes the ABI");
5924 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5925 this may not agree with the mode that the type system has chosen for the
5926 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5927 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5930 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5935 if (orig_mode != BLKmode)
5936 tmp = gen_rtx_REG (orig_mode, regno);
5939 tmp = gen_rtx_REG (mode, regno);
5940 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5941 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5947 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5948 of this code is to classify each 8bytes of incoming argument by the register
5949 class and assign registers accordingly. */
5951 /* Return the union class of CLASS1 and CLASS2.
5952 See the x86-64 PS ABI for details. */
5954 static enum x86_64_reg_class
5955 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5957 /* Rule #1: If both classes are equal, this is the resulting class. */
5958 if (class1 == class2)
5961 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5963 if (class1 == X86_64_NO_CLASS)
5965 if (class2 == X86_64_NO_CLASS)
5968 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5969 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5970 return X86_64_MEMORY_CLASS;
5972 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5973 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5974 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5975 return X86_64_INTEGERSI_CLASS;
5976 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5977 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5978 return X86_64_INTEGER_CLASS;
5980 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5982 if (class1 == X86_64_X87_CLASS
5983 || class1 == X86_64_X87UP_CLASS
5984 || class1 == X86_64_COMPLEX_X87_CLASS
5985 || class2 == X86_64_X87_CLASS
5986 || class2 == X86_64_X87UP_CLASS
5987 || class2 == X86_64_COMPLEX_X87_CLASS)
5988 return X86_64_MEMORY_CLASS;
5990 /* Rule #6: Otherwise class SSE is used. */
5991 return X86_64_SSE_CLASS;
5994 /* Classify the argument of type TYPE and mode MODE.
5995 CLASSES will be filled by the register class used to pass each word
5996 of the operand. The number of words is returned. In case the parameter
5997 should be passed in memory, 0 is returned. As a special case for zero
5998 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6000 BIT_OFFSET is used internally for handling records and specifies offset
6001 of the offset in bits modulo 256 to avoid overflow cases.
6003 See the x86-64 PS ABI for details.
6007 classify_argument (enum machine_mode mode, const_tree type,
6008 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6010 HOST_WIDE_INT bytes =
6011 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6013 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6015 /* Variable sized entities are always passed/returned in memory. */
6019 if (mode != VOIDmode
6020 && targetm.calls.must_pass_in_stack (mode, type))
6023 if (type && AGGREGATE_TYPE_P (type))
6027 enum x86_64_reg_class subclasses[MAX_CLASSES];
6029 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6033 for (i = 0; i < words; i++)
6034 classes[i] = X86_64_NO_CLASS;
6036 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6037 signalize memory class, so handle it as special case. */
6040 classes[0] = X86_64_NO_CLASS;
6044 /* Classify each field of record and merge classes. */
6045 switch (TREE_CODE (type))
6048 /* And now merge the fields of structure. */
6049 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6051 if (TREE_CODE (field) == FIELD_DECL)
6055 if (TREE_TYPE (field) == error_mark_node)
6058 /* Bitfields are always classified as integer. Handle them
6059 early, since later code would consider them to be
6060 misaligned integers. */
6061 if (DECL_BIT_FIELD (field))
6063 for (i = (int_bit_position (field)
6064 + (bit_offset % 64)) / 8 / 8;
6065 i < ((int_bit_position (field) + (bit_offset % 64))
6066 + tree_low_cst (DECL_SIZE (field), 0)
6069 merge_classes (X86_64_INTEGER_CLASS,
6076 type = TREE_TYPE (field);
6078 /* Flexible array member is ignored. */
6079 if (TYPE_MODE (type) == BLKmode
6080 && TREE_CODE (type) == ARRAY_TYPE
6081 && TYPE_SIZE (type) == NULL_TREE
6082 && TYPE_DOMAIN (type) != NULL_TREE
6083 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6088 if (!warned && warn_psabi)
6091 inform (input_location,
6092 "the ABI of passing struct with"
6093 " a flexible array member has"
6094 " changed in GCC 4.4");
6098 num = classify_argument (TYPE_MODE (type), type,
6100 (int_bit_position (field)
6101 + bit_offset) % 256);
6104 pos = (int_bit_position (field)
6105 + (bit_offset % 64)) / 8 / 8;
6106 for (i = 0; i < num && (i + pos) < words; i++)
6108 merge_classes (subclasses[i], classes[i + pos]);
6115 /* Arrays are handled as small records. */
6118 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6119 TREE_TYPE (type), subclasses, bit_offset);
6123 /* The partial classes are now full classes. */
6124 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6125 subclasses[0] = X86_64_SSE_CLASS;
6126 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6127 && !((bit_offset % 64) == 0 && bytes == 4))
6128 subclasses[0] = X86_64_INTEGER_CLASS;
6130 for (i = 0; i < words; i++)
6131 classes[i] = subclasses[i % num];
6136 case QUAL_UNION_TYPE:
6137 /* Unions are similar to RECORD_TYPE but offset is always 0.
6139 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6141 if (TREE_CODE (field) == FIELD_DECL)
6145 if (TREE_TYPE (field) == error_mark_node)
6148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6149 TREE_TYPE (field), subclasses,
6153 for (i = 0; i < num; i++)
6154 classes[i] = merge_classes (subclasses[i], classes[i]);
6165 /* When size > 16 bytes, if the first one isn't
6166 X86_64_SSE_CLASS or any other ones aren't
6167 X86_64_SSEUP_CLASS, everything should be passed in
6169 if (classes[0] != X86_64_SSE_CLASS)
6172 for (i = 1; i < words; i++)
6173 if (classes[i] != X86_64_SSEUP_CLASS)
6177 /* Final merger cleanup. */
6178 for (i = 0; i < words; i++)
6180 /* If one class is MEMORY, everything should be passed in
6182 if (classes[i] == X86_64_MEMORY_CLASS)
6185 /* The X86_64_SSEUP_CLASS should be always preceded by
6186 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6187 if (classes[i] == X86_64_SSEUP_CLASS
6188 && classes[i - 1] != X86_64_SSE_CLASS
6189 && classes[i - 1] != X86_64_SSEUP_CLASS)
6191 /* The first one should never be X86_64_SSEUP_CLASS. */
6192 gcc_assert (i != 0);
6193 classes[i] = X86_64_SSE_CLASS;
6196 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6197 everything should be passed in memory. */
6198 if (classes[i] == X86_64_X87UP_CLASS
6199 && (classes[i - 1] != X86_64_X87_CLASS))
6203 /* The first one should never be X86_64_X87UP_CLASS. */
6204 gcc_assert (i != 0);
6205 if (!warned && warn_psabi)
6208 inform (input_location,
6209 "the ABI of passing union with long double"
6210 " has changed in GCC 4.4");
6218 /* Compute alignment needed. We align all types to natural boundaries with
6219 exception of XFmode that is aligned to 64bits. */
6220 if (mode != VOIDmode && mode != BLKmode)
6222 int mode_alignment = GET_MODE_BITSIZE (mode);
6225 mode_alignment = 128;
6226 else if (mode == XCmode)
6227 mode_alignment = 256;
6228 if (COMPLEX_MODE_P (mode))
6229 mode_alignment /= 2;
6230 /* Misaligned fields are always returned in memory. */
6231 if (bit_offset % mode_alignment)
6235 /* for V1xx modes, just use the base mode */
6236 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6237 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6238 mode = GET_MODE_INNER (mode);
6240 /* Classification of atomic types. */
6245 classes[0] = X86_64_SSE_CLASS;
6248 classes[0] = X86_64_SSE_CLASS;
6249 classes[1] = X86_64_SSEUP_CLASS;
6259 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6263 classes[0] = X86_64_INTEGERSI_CLASS;
6266 else if (size <= 64)
6268 classes[0] = X86_64_INTEGER_CLASS;
6271 else if (size <= 64+32)
6273 classes[0] = X86_64_INTEGER_CLASS;
6274 classes[1] = X86_64_INTEGERSI_CLASS;
6277 else if (size <= 64+64)
6279 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6287 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6291 /* OImode shouldn't be used directly. */
6296 if (!(bit_offset % 64))
6297 classes[0] = X86_64_SSESF_CLASS;
6299 classes[0] = X86_64_SSE_CLASS;
6302 classes[0] = X86_64_SSEDF_CLASS;
6305 classes[0] = X86_64_X87_CLASS;
6306 classes[1] = X86_64_X87UP_CLASS;
6309 classes[0] = X86_64_SSE_CLASS;
6310 classes[1] = X86_64_SSEUP_CLASS;
6313 classes[0] = X86_64_SSE_CLASS;
6314 if (!(bit_offset % 64))
6320 if (!warned && warn_psabi)
6323 inform (input_location,
6324 "the ABI of passing structure with complex float"
6325 " member has changed in GCC 4.4");
6327 classes[1] = X86_64_SSESF_CLASS;
6331 classes[0] = X86_64_SSEDF_CLASS;
6332 classes[1] = X86_64_SSEDF_CLASS;
6335 classes[0] = X86_64_COMPLEX_X87_CLASS;
6338 /* This modes is larger than 16 bytes. */
6346 classes[0] = X86_64_SSE_CLASS;
6347 classes[1] = X86_64_SSEUP_CLASS;
6348 classes[2] = X86_64_SSEUP_CLASS;
6349 classes[3] = X86_64_SSEUP_CLASS;
6357 classes[0] = X86_64_SSE_CLASS;
6358 classes[1] = X86_64_SSEUP_CLASS;
6366 classes[0] = X86_64_SSE_CLASS;
6372 gcc_assert (VECTOR_MODE_P (mode));
6377 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6379 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6380 classes[0] = X86_64_INTEGERSI_CLASS;
6382 classes[0] = X86_64_INTEGER_CLASS;
6383 classes[1] = X86_64_INTEGER_CLASS;
6384 return 1 + (bytes > 8);
6388 /* Examine the argument and return set number of register required in each
6389 class. Return 0 iff parameter should be passed in memory. */
6391 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6392 int *int_nregs, int *sse_nregs)
6394 enum x86_64_reg_class regclass[MAX_CLASSES];
6395 int n = classify_argument (mode, type, regclass, 0);
6401 for (n--; n >= 0; n--)
6402 switch (regclass[n])
6404 case X86_64_INTEGER_CLASS:
6405 case X86_64_INTEGERSI_CLASS:
6408 case X86_64_SSE_CLASS:
6409 case X86_64_SSESF_CLASS:
6410 case X86_64_SSEDF_CLASS:
6413 case X86_64_NO_CLASS:
6414 case X86_64_SSEUP_CLASS:
6416 case X86_64_X87_CLASS:
6417 case X86_64_X87UP_CLASS:
6421 case X86_64_COMPLEX_X87_CLASS:
6422 return in_return ? 2 : 0;
6423 case X86_64_MEMORY_CLASS:
6429 /* Construct container for the argument used by GCC interface. See
6430 FUNCTION_ARG for the detailed description. */
6433 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6434 const_tree type, int in_return, int nintregs, int nsseregs,
6435 const int *intreg, int sse_regno)
6437 /* The following variables hold the static issued_error state. */
6438 static bool issued_sse_arg_error;
6439 static bool issued_sse_ret_error;
6440 static bool issued_x87_ret_error;
6442 enum machine_mode tmpmode;
6444 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6445 enum x86_64_reg_class regclass[MAX_CLASSES];
6449 int needed_sseregs, needed_intregs;
6450 rtx exp[MAX_CLASSES];
6453 n = classify_argument (mode, type, regclass, 0);
6456 if (!examine_argument (mode, type, in_return, &needed_intregs,
6459 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6462 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6463 some less clueful developer tries to use floating-point anyway. */
6464 if (needed_sseregs && !TARGET_SSE)
6468 if (!issued_sse_ret_error)
6470 error ("SSE register return with SSE disabled");
6471 issued_sse_ret_error = true;
6474 else if (!issued_sse_arg_error)
6476 error ("SSE register argument with SSE disabled");
6477 issued_sse_arg_error = true;
6482 /* Likewise, error if the ABI requires us to return values in the
6483 x87 registers and the user specified -mno-80387. */
6484 if (!TARGET_80387 && in_return)
6485 for (i = 0; i < n; i++)
6486 if (regclass[i] == X86_64_X87_CLASS
6487 || regclass[i] == X86_64_X87UP_CLASS
6488 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6490 if (!issued_x87_ret_error)
6492 error ("x87 register return with x87 disabled");
6493 issued_x87_ret_error = true;
6498 /* First construct simple cases. Avoid SCmode, since we want to use
6499 single register to pass this type. */
6500 if (n == 1 && mode != SCmode)
6501 switch (regclass[0])
6503 case X86_64_INTEGER_CLASS:
6504 case X86_64_INTEGERSI_CLASS:
6505 return gen_rtx_REG (mode, intreg[0]);
6506 case X86_64_SSE_CLASS:
6507 case X86_64_SSESF_CLASS:
6508 case X86_64_SSEDF_CLASS:
6509 if (mode != BLKmode)
6510 return gen_reg_or_parallel (mode, orig_mode,
6511 SSE_REGNO (sse_regno));
6513 case X86_64_X87_CLASS:
6514 case X86_64_COMPLEX_X87_CLASS:
6515 return gen_rtx_REG (mode, FIRST_STACK_REG);
6516 case X86_64_NO_CLASS:
6517 /* Zero sized array, struct or class. */
6523 && regclass[0] == X86_64_SSE_CLASS
6524 && regclass[1] == X86_64_SSEUP_CLASS
6526 return gen_reg_or_parallel (mode, orig_mode,
6527 SSE_REGNO (sse_regno));
6529 && regclass[0] == X86_64_SSE_CLASS
6530 && regclass[1] == X86_64_SSEUP_CLASS
6531 && regclass[2] == X86_64_SSEUP_CLASS
6532 && regclass[3] == X86_64_SSEUP_CLASS
6534 return gen_reg_or_parallel (mode, orig_mode,
6535 SSE_REGNO (sse_regno));
6537 && regclass[0] == X86_64_X87_CLASS
6538 && regclass[1] == X86_64_X87UP_CLASS)
6539 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6542 && regclass[0] == X86_64_INTEGER_CLASS
6543 && regclass[1] == X86_64_INTEGER_CLASS
6544 && (mode == CDImode || mode == TImode || mode == TFmode)
6545 && intreg[0] + 1 == intreg[1])
6546 return gen_rtx_REG (mode, intreg[0]);
6548 /* Otherwise figure out the entries of the PARALLEL. */
6549 for (i = 0; i < n; i++)
6553 switch (regclass[i])
6555 case X86_64_NO_CLASS:
6557 case X86_64_INTEGER_CLASS:
6558 case X86_64_INTEGERSI_CLASS:
6559 /* Merge TImodes on aligned occasions here too. */
6560 if (i * 8 + 8 > bytes)
6562 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6563 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6567 /* We've requested 24 bytes we
6568 don't have mode for. Use DImode. */
6569 if (tmpmode == BLKmode)
6572 = gen_rtx_EXPR_LIST (VOIDmode,
6573 gen_rtx_REG (tmpmode, *intreg),
6577 case X86_64_SSESF_CLASS:
6579 = gen_rtx_EXPR_LIST (VOIDmode,
6580 gen_rtx_REG (SFmode,
6581 SSE_REGNO (sse_regno)),
6585 case X86_64_SSEDF_CLASS:
6587 = gen_rtx_EXPR_LIST (VOIDmode,
6588 gen_rtx_REG (DFmode,
6589 SSE_REGNO (sse_regno)),
6593 case X86_64_SSE_CLASS:
6601 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6611 && regclass[1] == X86_64_SSEUP_CLASS
6612 && regclass[2] == X86_64_SSEUP_CLASS
6613 && regclass[3] == X86_64_SSEUP_CLASS);
6621 = gen_rtx_EXPR_LIST (VOIDmode,
6622 gen_rtx_REG (tmpmode,
6623 SSE_REGNO (sse_regno)),
6632 /* Empty aligned struct, union or class. */
6636 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6637 for (i = 0; i < nexps; i++)
6638 XVECEXP (ret, 0, i) = exp [i];
6642 /* Update the data in CUM to advance over an argument of mode MODE
6643 and data type TYPE. (TYPE is null for libcalls where that information
6644 may not be available.) */
6647 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6648 const_tree type, HOST_WIDE_INT bytes,
6649 HOST_WIDE_INT words)
6665 cum->words += words;
6666 cum->nregs -= words;
6667 cum->regno += words;
6669 if (cum->nregs <= 0)
6677 /* OImode shouldn't be used directly. */
6681 if (cum->float_in_sse < 2)
6684 if (cum->float_in_sse < 1)
6701 if (!type || !AGGREGATE_TYPE_P (type))
6703 cum->sse_words += words;
6704 cum->sse_nregs -= 1;
6705 cum->sse_regno += 1;
6706 if (cum->sse_nregs <= 0)
6720 if (!type || !AGGREGATE_TYPE_P (type))
6722 cum->mmx_words += words;
6723 cum->mmx_nregs -= 1;
6724 cum->mmx_regno += 1;
6725 if (cum->mmx_nregs <= 0)
6736 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6737 const_tree type, HOST_WIDE_INT words, bool named)
6739 int int_nregs, sse_nregs;
6741 /* Unnamed 256bit vector mode parameters are passed on stack. */
6742 if (!named && VALID_AVX256_REG_MODE (mode))
6745 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6746 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6748 cum->nregs -= int_nregs;
6749 cum->sse_nregs -= sse_nregs;
6750 cum->regno += int_nregs;
6751 cum->sse_regno += sse_nregs;
6755 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6756 cum->words = (cum->words + align - 1) & ~(align - 1);
6757 cum->words += words;
6762 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6763 HOST_WIDE_INT words)
6765 /* Otherwise, this should be passed indirect. */
6766 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6768 cum->words += words;
6776 /* Update the data in CUM to advance over an argument of mode MODE and
6777 data type TYPE. (TYPE is null for libcalls where that information
6778 may not be available.) */
6781 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6782 const_tree type, bool named)
6784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6785 HOST_WIDE_INT bytes, words;
6787 if (mode == BLKmode)
6788 bytes = int_size_in_bytes (type);
6790 bytes = GET_MODE_SIZE (mode);
6791 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6794 mode = type_natural_mode (type, NULL);
6796 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6797 function_arg_advance_ms_64 (cum, bytes, words);
6798 else if (TARGET_64BIT)
6799 function_arg_advance_64 (cum, mode, type, words, named);
6801 function_arg_advance_32 (cum, mode, type, bytes, words);
6804 /* Define where to put the arguments to a function.
6805 Value is zero to push the argument on the stack,
6806 or a hard register in which to store the argument.
6808 MODE is the argument's machine mode.
6809 TYPE is the data type of the argument (as a tree).
6810 This is null for libcalls where that information may
6812 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6813 the preceding args and about the function being called.
6814 NAMED is nonzero if this argument is a named parameter
6815 (otherwise it is an extra parameter matching an ellipsis). */
6818 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6819 enum machine_mode orig_mode, const_tree type,
6820 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6822 static bool warnedsse, warnedmmx;
6824 /* Avoid the AL settings for the Unix64 ABI. */
6825 if (mode == VOIDmode)
6841 if (words <= cum->nregs)
6843 int regno = cum->regno;
6845 /* Fastcall allocates the first two DWORD (SImode) or
6846 smaller arguments to ECX and EDX if it isn't an
6852 || (type && AGGREGATE_TYPE_P (type)))
6855 /* ECX not EAX is the first allocated register. */
6856 if (regno == AX_REG)
6859 return gen_rtx_REG (mode, regno);
6864 if (cum->float_in_sse < 2)
6867 if (cum->float_in_sse < 1)
6871 /* In 32bit, we pass TImode in xmm registers. */
6878 if (!type || !AGGREGATE_TYPE_P (type))
6880 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6883 warning (0, "SSE vector argument without SSE enabled "
6887 return gen_reg_or_parallel (mode, orig_mode,
6888 cum->sse_regno + FIRST_SSE_REG);
6893 /* OImode shouldn't be used directly. */
6902 if (!type || !AGGREGATE_TYPE_P (type))
6905 return gen_reg_or_parallel (mode, orig_mode,
6906 cum->sse_regno + FIRST_SSE_REG);
6916 if (!type || !AGGREGATE_TYPE_P (type))
6918 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6921 warning (0, "MMX vector argument without MMX enabled "
6925 return gen_reg_or_parallel (mode, orig_mode,
6926 cum->mmx_regno + FIRST_MMX_REG);
6935 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6936 enum machine_mode orig_mode, const_tree type, bool named)
6938 /* Handle a hidden AL argument containing number of registers
6939 for varargs x86-64 functions. */
6940 if (mode == VOIDmode)
6941 return GEN_INT (cum->maybe_vaarg
6942 ? (cum->sse_nregs < 0
6943 ? X86_64_SSE_REGPARM_MAX
6958 /* Unnamed 256bit vector mode parameters are passed on stack. */
6964 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6966 &x86_64_int_parameter_registers [cum->regno],
6971 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6972 enum machine_mode orig_mode, bool named,
6973 HOST_WIDE_INT bytes)
6977 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6978 We use value of -2 to specify that current function call is MSABI. */
6979 if (mode == VOIDmode)
6980 return GEN_INT (-2);
6982 /* If we've run out of registers, it goes on the stack. */
6983 if (cum->nregs == 0)
6986 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6988 /* Only floating point modes are passed in anything but integer regs. */
6989 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6992 regno = cum->regno + FIRST_SSE_REG;
6997 /* Unnamed floating parameters are passed in both the
6998 SSE and integer registers. */
6999 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7000 t2 = gen_rtx_REG (mode, regno);
7001 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7002 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7003 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7006 /* Handle aggregated types passed in register. */
7007 if (orig_mode == BLKmode)
7009 if (bytes > 0 && bytes <= 8)
7010 mode = (bytes > 4 ? DImode : SImode);
7011 if (mode == BLKmode)
7015 return gen_reg_or_parallel (mode, orig_mode, regno);
7018 /* Return where to put the arguments to a function.
7019 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7021 MODE is the argument's machine mode. TYPE is the data type of the
7022 argument. It is null for libcalls where that information may not be
7023 available. CUM gives information about the preceding args and about
7024 the function being called. NAMED is nonzero if this argument is a
7025 named parameter (otherwise it is an extra parameter matching an
7029 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7030 const_tree type, bool named)
7032 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7033 enum machine_mode mode = omode;
7034 HOST_WIDE_INT bytes, words;
7037 if (mode == BLKmode)
7038 bytes = int_size_in_bytes (type);
7040 bytes = GET_MODE_SIZE (mode);
7041 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7043 /* To simplify the code below, represent vector types with a vector mode
7044 even if MMX/SSE are not active. */
7045 if (type && TREE_CODE (type) == VECTOR_TYPE)
7046 mode = type_natural_mode (type, cum);
7048 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7049 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7050 else if (TARGET_64BIT)
7051 arg = function_arg_64 (cum, mode, omode, type, named);
7053 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7058 /* A C expression that indicates when an argument must be passed by
7059 reference. If nonzero for an argument, a copy of that argument is
7060 made in memory and a pointer to the argument is passed instead of
7061 the argument itself. The pointer is passed in whatever way is
7062 appropriate for passing a pointer to that type. */
7065 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
7066 enum machine_mode mode ATTRIBUTE_UNUSED,
7067 const_tree type, bool named ATTRIBUTE_UNUSED)
7069 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7071 /* See Windows x64 Software Convention. */
7072 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7074 int msize = (int) GET_MODE_SIZE (mode);
7077 /* Arrays are passed by reference. */
7078 if (TREE_CODE (type) == ARRAY_TYPE)
7081 if (AGGREGATE_TYPE_P (type))
7083 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7084 are passed by reference. */
7085 msize = int_size_in_bytes (type);
7089 /* __m128 is passed by reference. */
7091 case 1: case 2: case 4: case 8:
7097 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7103 /* Return true when TYPE should be 128bit aligned for 32bit argument
7104 passing ABI. XXX: This function is obsolete and is only used for
7105 checking psABI compatibility with previous versions of GCC. */
7108 ix86_compat_aligned_value_p (const_tree type)
7110 enum machine_mode mode = TYPE_MODE (type);
7111 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7115 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7117 if (TYPE_ALIGN (type) < 128)
7120 if (AGGREGATE_TYPE_P (type))
7122 /* Walk the aggregates recursively. */
7123 switch (TREE_CODE (type))
7127 case QUAL_UNION_TYPE:
7131 /* Walk all the structure fields. */
7132 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7134 if (TREE_CODE (field) == FIELD_DECL
7135 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7142 /* Just for use if some languages passes arrays by value. */
7143 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7154 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7155 XXX: This function is obsolete and is only used for checking psABI
7156 compatibility with previous versions of GCC. */
7159 ix86_compat_function_arg_boundary (enum machine_mode mode,
7160 const_tree type, unsigned int align)
7162 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7163 natural boundaries. */
7164 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7166 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7167 make an exception for SSE modes since these require 128bit
7170 The handling here differs from field_alignment. ICC aligns MMX
7171 arguments to 4 byte boundaries, while structure fields are aligned
7172 to 8 byte boundaries. */
7175 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7176 align = PARM_BOUNDARY;
7180 if (!ix86_compat_aligned_value_p (type))
7181 align = PARM_BOUNDARY;
7184 if (align > BIGGEST_ALIGNMENT)
7185 align = BIGGEST_ALIGNMENT;
7189 /* Return true when TYPE should be 128bit aligned for 32bit argument
7193 ix86_contains_aligned_value_p (const_tree type)
7195 enum machine_mode mode = TYPE_MODE (type);
7197 if (mode == XFmode || mode == XCmode)
7200 if (TYPE_ALIGN (type) < 128)
7203 if (AGGREGATE_TYPE_P (type))
7205 /* Walk the aggregates recursively. */
7206 switch (TREE_CODE (type))
7210 case QUAL_UNION_TYPE:
7214 /* Walk all the structure fields. */
7215 for (field = TYPE_FIELDS (type);
7217 field = DECL_CHAIN (field))
7219 if (TREE_CODE (field) == FIELD_DECL
7220 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7227 /* Just for use if some languages passes arrays by value. */
7228 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7237 return TYPE_ALIGN (type) >= 128;
7242 /* Gives the alignment boundary, in bits, of an argument with the
7243 specified mode and type. */
7246 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7251 /* Since the main variant type is used for call, we convert it to
7252 the main variant type. */
7253 type = TYPE_MAIN_VARIANT (type);
7254 align = TYPE_ALIGN (type);
7257 align = GET_MODE_ALIGNMENT (mode);
7258 if (align < PARM_BOUNDARY)
7259 align = PARM_BOUNDARY;
7263 unsigned int saved_align = align;
7267 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7270 if (mode == XFmode || mode == XCmode)
7271 align = PARM_BOUNDARY;
7273 else if (!ix86_contains_aligned_value_p (type))
7274 align = PARM_BOUNDARY;
7277 align = PARM_BOUNDARY;
7282 && align != ix86_compat_function_arg_boundary (mode, type,
7286 inform (input_location,
7287 "The ABI for passing parameters with %d-byte"
7288 " alignment has changed in GCC 4.6",
7289 align / BITS_PER_UNIT);
7296 /* Return true if N is a possible register number of function value. */
7299 ix86_function_value_regno_p (const unsigned int regno)
7306 case FIRST_FLOAT_REG:
7307 /* TODO: The function should depend on current function ABI but
7308 builtins.c would need updating then. Therefore we use the
7310 if (TARGET_64BIT && ix86_abi == MS_ABI)
7312 return TARGET_FLOAT_RETURNS_IN_80387;
7318 if (TARGET_MACHO || TARGET_64BIT)
7326 /* Define how to find the value returned by a function.
7327 VALTYPE is the data type of the value (as a tree).
7328 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7329 otherwise, FUNC is 0. */
7332 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7333 const_tree fntype, const_tree fn)
7337 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7338 we normally prevent this case when mmx is not available. However
7339 some ABIs may require the result to be returned like DImode. */
7340 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7341 regno = FIRST_MMX_REG;
7343 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7344 we prevent this case when sse is not available. However some ABIs
7345 may require the result to be returned like integer TImode. */
7346 else if (mode == TImode
7347 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7348 regno = FIRST_SSE_REG;
7350 /* 32-byte vector modes in %ymm0. */
7351 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7352 regno = FIRST_SSE_REG;
7354 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7355 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7356 regno = FIRST_FLOAT_REG;
7358 /* Most things go in %eax. */
7361 /* Override FP return register with %xmm0 for local functions when
7362 SSE math is enabled or for functions with sseregparm attribute. */
7363 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7365 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7366 if ((sse_level >= 1 && mode == SFmode)
7367 || (sse_level == 2 && mode == DFmode))
7368 regno = FIRST_SSE_REG;
7371 /* OImode shouldn't be used directly. */
7372 gcc_assert (mode != OImode);
7374 return gen_rtx_REG (orig_mode, regno);
7378 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7383 /* Handle libcalls, which don't provide a type node. */
7384 if (valtype == NULL)
7398 regno = FIRST_SSE_REG;
7402 regno = FIRST_FLOAT_REG;
7410 return gen_rtx_REG (mode, regno);
7412 else if (POINTER_TYPE_P (valtype))
7414 /* Pointers are always returned in word_mode. */
7418 ret = construct_container (mode, orig_mode, valtype, 1,
7419 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7420 x86_64_int_return_registers, 0);
7422 /* For zero sized structures, construct_container returns NULL, but we
7423 need to keep rest of compiler happy by returning meaningful value. */
7425 ret = gen_rtx_REG (orig_mode, AX_REG);
7431 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7434 unsigned int regno = AX_REG;
7438 switch (GET_MODE_SIZE (mode))
7441 if (valtype != NULL_TREE
7442 && !VECTOR_INTEGER_TYPE_P (valtype)
7443 && !VECTOR_INTEGER_TYPE_P (valtype)
7444 && !INTEGRAL_TYPE_P (valtype)
7445 && !VECTOR_FLOAT_TYPE_P (valtype))
7447 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7448 && !COMPLEX_MODE_P (mode))
7449 regno = FIRST_SSE_REG;
7453 if (mode == SFmode || mode == DFmode)
7454 regno = FIRST_SSE_REG;
7460 return gen_rtx_REG (orig_mode, regno);
7464 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7465 enum machine_mode orig_mode, enum machine_mode mode)
7467 const_tree fn, fntype;
7470 if (fntype_or_decl && DECL_P (fntype_or_decl))
7471 fn = fntype_or_decl;
7472 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7474 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7475 return function_value_ms_64 (orig_mode, mode, valtype);
7476 else if (TARGET_64BIT)
7477 return function_value_64 (orig_mode, mode, valtype);
7479 return function_value_32 (orig_mode, mode, fntype, fn);
7483 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7484 bool outgoing ATTRIBUTE_UNUSED)
7486 enum machine_mode mode, orig_mode;
7488 orig_mode = TYPE_MODE (valtype);
7489 mode = type_natural_mode (valtype, NULL);
7490 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7493 /* Pointer function arguments and return values are promoted to
7496 static enum machine_mode
7497 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7498 int *punsignedp, const_tree fntype,
7501 if (type != NULL_TREE && POINTER_TYPE_P (type))
7503 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7506 return default_promote_function_mode (type, mode, punsignedp, fntype,
7510 /* Return true if a structure, union or array with MODE containing FIELD
7511 should be accessed using BLKmode. */
7514 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7516 /* Union with XFmode must be in BLKmode. */
7517 return (mode == XFmode
7518 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7519 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7523 ix86_libcall_value (enum machine_mode mode)
7525 return ix86_function_value_1 (NULL, NULL, mode, mode);
7528 /* Return true iff type is returned in memory. */
7530 static bool ATTRIBUTE_UNUSED
7531 return_in_memory_32 (const_tree type, enum machine_mode mode)
7535 if (mode == BLKmode)
7538 size = int_size_in_bytes (type);
7540 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7543 if (VECTOR_MODE_P (mode) || mode == TImode)
7545 /* User-created vectors small enough to fit in EAX. */
7549 /* MMX/3dNow values are returned in MM0,
7550 except when it doesn't exits or the ABI prescribes otherwise. */
7552 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7554 /* SSE values are returned in XMM0, except when it doesn't exist. */
7558 /* AVX values are returned in YMM0, except when it doesn't exist. */
7569 /* OImode shouldn't be used directly. */
7570 gcc_assert (mode != OImode);
7575 static bool ATTRIBUTE_UNUSED
7576 return_in_memory_64 (const_tree type, enum machine_mode mode)
7578 int needed_intregs, needed_sseregs;
7579 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7582 static bool ATTRIBUTE_UNUSED
7583 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7585 HOST_WIDE_INT size = int_size_in_bytes (type);
7587 /* __m128 is returned in xmm0. */
7588 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
7589 || VECTOR_FLOAT_TYPE_P (type))
7590 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7591 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7594 /* Otherwise, the size must be exactly in [1248]. */
7595 return size != 1 && size != 2 && size != 4 && size != 8;
7599 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7601 #ifdef SUBTARGET_RETURN_IN_MEMORY
7602 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7604 const enum machine_mode mode = type_natural_mode (type, NULL);
7608 if (ix86_function_type_abi (fntype) == MS_ABI)
7609 return return_in_memory_ms_64 (type, mode);
7611 return return_in_memory_64 (type, mode);
7614 return return_in_memory_32 (type, mode);
7618 /* When returning SSE vector types, we have a choice of either
7619 (1) being abi incompatible with a -march switch, or
7620 (2) generating an error.
7621 Given no good solution, I think the safest thing is one warning.
7622 The user won't be able to use -Werror, but....
7624 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7625 called in response to actually generating a caller or callee that
7626 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7627 via aggregate_value_p for general type probing from tree-ssa. */
7630 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7632 static bool warnedsse, warnedmmx;
7634 if (!TARGET_64BIT && type)
7636 /* Look at the return type of the function, not the function type. */
7637 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7639 if (!TARGET_SSE && !warnedsse)
7642 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7645 warning (0, "SSE vector return without SSE enabled "
7650 if (!TARGET_MMX && !warnedmmx)
7652 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7655 warning (0, "MMX vector return without MMX enabled "
7665 /* Create the va_list data type. */
7667 /* Returns the calling convention specific va_list date type.
7668 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7671 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7673 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7675 /* For i386 we use plain pointer to argument area. */
7676 if (!TARGET_64BIT || abi == MS_ABI)
7677 return build_pointer_type (char_type_node);
7679 record = lang_hooks.types.make_type (RECORD_TYPE);
7680 type_decl = build_decl (BUILTINS_LOCATION,
7681 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7683 f_gpr = build_decl (BUILTINS_LOCATION,
7684 FIELD_DECL, get_identifier ("gp_offset"),
7685 unsigned_type_node);
7686 f_fpr = build_decl (BUILTINS_LOCATION,
7687 FIELD_DECL, get_identifier ("fp_offset"),
7688 unsigned_type_node);
7689 f_ovf = build_decl (BUILTINS_LOCATION,
7690 FIELD_DECL, get_identifier ("overflow_arg_area"),
7692 f_sav = build_decl (BUILTINS_LOCATION,
7693 FIELD_DECL, get_identifier ("reg_save_area"),
7696 va_list_gpr_counter_field = f_gpr;
7697 va_list_fpr_counter_field = f_fpr;
7699 DECL_FIELD_CONTEXT (f_gpr) = record;
7700 DECL_FIELD_CONTEXT (f_fpr) = record;
7701 DECL_FIELD_CONTEXT (f_ovf) = record;
7702 DECL_FIELD_CONTEXT (f_sav) = record;
7704 TYPE_STUB_DECL (record) = type_decl;
7705 TYPE_NAME (record) = type_decl;
7706 TYPE_FIELDS (record) = f_gpr;
7707 DECL_CHAIN (f_gpr) = f_fpr;
7708 DECL_CHAIN (f_fpr) = f_ovf;
7709 DECL_CHAIN (f_ovf) = f_sav;
7711 layout_type (record);
7713 /* The correct type is an array type of one element. */
7714 return build_array_type (record, build_index_type (size_zero_node));
7717 /* Setup the builtin va_list data type and for 64-bit the additional
7718 calling convention specific va_list data types. */
7721 ix86_build_builtin_va_list (void)
7723 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7725 /* Initialize abi specific va_list builtin types. */
7729 if (ix86_abi == MS_ABI)
7731 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7732 if (TREE_CODE (t) != RECORD_TYPE)
7733 t = build_variant_type_copy (t);
7734 sysv_va_list_type_node = t;
7739 if (TREE_CODE (t) != RECORD_TYPE)
7740 t = build_variant_type_copy (t);
7741 sysv_va_list_type_node = t;
7743 if (ix86_abi != MS_ABI)
7745 t = ix86_build_builtin_va_list_abi (MS_ABI);
7746 if (TREE_CODE (t) != RECORD_TYPE)
7747 t = build_variant_type_copy (t);
7748 ms_va_list_type_node = t;
7753 if (TREE_CODE (t) != RECORD_TYPE)
7754 t = build_variant_type_copy (t);
7755 ms_va_list_type_node = t;
7762 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7765 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7771 /* GPR size of varargs save area. */
7772 if (cfun->va_list_gpr_size)
7773 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7775 ix86_varargs_gpr_size = 0;
7777 /* FPR size of varargs save area. We don't need it if we don't pass
7778 anything in SSE registers. */
7779 if (TARGET_SSE && cfun->va_list_fpr_size)
7780 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7782 ix86_varargs_fpr_size = 0;
7784 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7787 save_area = frame_pointer_rtx;
7788 set = get_varargs_alias_set ();
7790 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7791 if (max > X86_64_REGPARM_MAX)
7792 max = X86_64_REGPARM_MAX;
7794 for (i = cum->regno; i < max; i++)
7796 mem = gen_rtx_MEM (word_mode,
7797 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
7798 MEM_NOTRAP_P (mem) = 1;
7799 set_mem_alias_set (mem, set);
7800 emit_move_insn (mem,
7801 gen_rtx_REG (word_mode,
7802 x86_64_int_parameter_registers[i]));
7805 if (ix86_varargs_fpr_size)
7807 enum machine_mode smode;
7810 /* Now emit code to save SSE registers. The AX parameter contains number
7811 of SSE parameter registers used to call this function, though all we
7812 actually check here is the zero/non-zero status. */
7814 label = gen_label_rtx ();
7815 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7816 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7819 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7820 we used movdqa (i.e. TImode) instead? Perhaps even better would
7821 be if we could determine the real mode of the data, via a hook
7822 into pass_stdarg. Ignore all that for now. */
7824 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7825 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7827 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7828 if (max > X86_64_SSE_REGPARM_MAX)
7829 max = X86_64_SSE_REGPARM_MAX;
7831 for (i = cum->sse_regno; i < max; ++i)
7833 mem = plus_constant (Pmode, save_area,
7834 i * 16 + ix86_varargs_gpr_size);
7835 mem = gen_rtx_MEM (smode, mem);
7836 MEM_NOTRAP_P (mem) = 1;
7837 set_mem_alias_set (mem, set);
7838 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7840 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7848 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7850 alias_set_type set = get_varargs_alias_set ();
7853 /* Reset to zero, as there might be a sysv vaarg used
7855 ix86_varargs_gpr_size = 0;
7856 ix86_varargs_fpr_size = 0;
7858 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7862 mem = gen_rtx_MEM (Pmode,
7863 plus_constant (Pmode, virtual_incoming_args_rtx,
7864 i * UNITS_PER_WORD));
7865 MEM_NOTRAP_P (mem) = 1;
7866 set_mem_alias_set (mem, set);
7868 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7869 emit_move_insn (mem, reg);
7874 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7875 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7878 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7879 CUMULATIVE_ARGS next_cum;
7882 /* This argument doesn't appear to be used anymore. Which is good,
7883 because the old code here didn't suppress rtl generation. */
7884 gcc_assert (!no_rtl);
7889 fntype = TREE_TYPE (current_function_decl);
7891 /* For varargs, we do not want to skip the dummy va_dcl argument.
7892 For stdargs, we do want to skip the last named argument. */
7894 if (stdarg_p (fntype))
7895 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7898 if (cum->call_abi == MS_ABI)
7899 setup_incoming_varargs_ms_64 (&next_cum);
7901 setup_incoming_varargs_64 (&next_cum);
7904 /* Checks if TYPE is of kind va_list char *. */
7907 is_va_list_char_pointer (tree type)
7911 /* For 32-bit it is always true. */
7914 canonic = ix86_canonical_va_list_type (type);
7915 return (canonic == ms_va_list_type_node
7916 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7919 /* Implement va_start. */
7922 ix86_va_start (tree valist, rtx nextarg)
7924 HOST_WIDE_INT words, n_gpr, n_fpr;
7925 tree f_gpr, f_fpr, f_ovf, f_sav;
7926 tree gpr, fpr, ovf, sav, t;
7930 if (flag_split_stack
7931 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7933 unsigned int scratch_regno;
7935 /* When we are splitting the stack, we can't refer to the stack
7936 arguments using internal_arg_pointer, because they may be on
7937 the old stack. The split stack prologue will arrange to
7938 leave a pointer to the old stack arguments in a scratch
7939 register, which we here copy to a pseudo-register. The split
7940 stack prologue can't set the pseudo-register directly because
7941 it (the prologue) runs before any registers have been saved. */
7943 scratch_regno = split_stack_prologue_scratch_regno ();
7944 if (scratch_regno != INVALID_REGNUM)
7948 reg = gen_reg_rtx (Pmode);
7949 cfun->machine->split_stack_varargs_pointer = reg;
7952 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7956 push_topmost_sequence ();
7957 emit_insn_after (seq, entry_of_function ());
7958 pop_topmost_sequence ();
7962 /* Only 64bit target needs something special. */
7963 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7965 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7966 std_expand_builtin_va_start (valist, nextarg);
7971 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7972 next = expand_binop (ptr_mode, add_optab,
7973 cfun->machine->split_stack_varargs_pointer,
7974 crtl->args.arg_offset_rtx,
7975 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7976 convert_move (va_r, next, 0);
7981 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7982 f_fpr = DECL_CHAIN (f_gpr);
7983 f_ovf = DECL_CHAIN (f_fpr);
7984 f_sav = DECL_CHAIN (f_ovf);
7986 valist = build_simple_mem_ref (valist);
7987 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7988 /* The following should be folded into the MEM_REF offset. */
7989 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7991 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7993 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7995 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7998 /* Count number of gp and fp argument registers used. */
7999 words = crtl->args.info.words;
8000 n_gpr = crtl->args.info.regno;
8001 n_fpr = crtl->args.info.sse_regno;
8003 if (cfun->va_list_gpr_size)
8005 type = TREE_TYPE (gpr);
8006 t = build2 (MODIFY_EXPR, type,
8007 gpr, build_int_cst (type, n_gpr * 8));
8008 TREE_SIDE_EFFECTS (t) = 1;
8009 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8012 if (TARGET_SSE && cfun->va_list_fpr_size)
8014 type = TREE_TYPE (fpr);
8015 t = build2 (MODIFY_EXPR, type, fpr,
8016 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8017 TREE_SIDE_EFFECTS (t) = 1;
8018 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8021 /* Find the overflow area. */
8022 type = TREE_TYPE (ovf);
8023 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8024 ovf_rtx = crtl->args.internal_arg_pointer;
8026 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8027 t = make_tree (type, ovf_rtx);
8029 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8030 t = build2 (MODIFY_EXPR, type, ovf, t);
8031 TREE_SIDE_EFFECTS (t) = 1;
8032 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8034 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8036 /* Find the register save area.
8037 Prologue of the function save it right above stack frame. */
8038 type = TREE_TYPE (sav);
8039 t = make_tree (type, frame_pointer_rtx);
8040 if (!ix86_varargs_gpr_size)
8041 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8042 t = build2 (MODIFY_EXPR, type, sav, t);
8043 TREE_SIDE_EFFECTS (t) = 1;
8044 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8048 /* Implement va_arg. */
8051 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8054 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8055 tree f_gpr, f_fpr, f_ovf, f_sav;
8056 tree gpr, fpr, ovf, sav, t;
8058 tree lab_false, lab_over = NULL_TREE;
8063 enum machine_mode nat_mode;
8064 unsigned int arg_boundary;
8066 /* Only 64bit target needs something special. */
8067 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8068 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8070 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8071 f_fpr = DECL_CHAIN (f_gpr);
8072 f_ovf = DECL_CHAIN (f_fpr);
8073 f_sav = DECL_CHAIN (f_ovf);
8075 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8076 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8077 valist = build_va_arg_indirect_ref (valist);
8078 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8079 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8080 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8082 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8084 type = build_pointer_type (type);
8085 size = int_size_in_bytes (type);
8086 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8088 nat_mode = type_natural_mode (type, NULL);
8097 /* Unnamed 256bit vector mode parameters are passed on stack. */
8098 if (!TARGET_64BIT_MS_ABI)
8105 container = construct_container (nat_mode, TYPE_MODE (type),
8106 type, 0, X86_64_REGPARM_MAX,
8107 X86_64_SSE_REGPARM_MAX, intreg,
8112 /* Pull the value out of the saved registers. */
8114 addr = create_tmp_var (ptr_type_node, "addr");
8118 int needed_intregs, needed_sseregs;
8120 tree int_addr, sse_addr;
8122 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8123 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8125 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8127 need_temp = (!REG_P (container)
8128 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8129 || TYPE_ALIGN (type) > 128));
8131 /* In case we are passing structure, verify that it is consecutive block
8132 on the register save area. If not we need to do moves. */
8133 if (!need_temp && !REG_P (container))
8135 /* Verify that all registers are strictly consecutive */
8136 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8140 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8142 rtx slot = XVECEXP (container, 0, i);
8143 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8144 || INTVAL (XEXP (slot, 1)) != i * 16)
8152 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8154 rtx slot = XVECEXP (container, 0, i);
8155 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8156 || INTVAL (XEXP (slot, 1)) != i * 8)
8168 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8169 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8172 /* First ensure that we fit completely in registers. */
8175 t = build_int_cst (TREE_TYPE (gpr),
8176 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8177 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8178 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8179 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8180 gimplify_and_add (t, pre_p);
8184 t = build_int_cst (TREE_TYPE (fpr),
8185 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8186 + X86_64_REGPARM_MAX * 8);
8187 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8188 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8189 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8190 gimplify_and_add (t, pre_p);
8193 /* Compute index to start of area used for integer regs. */
8196 /* int_addr = gpr + sav; */
8197 t = fold_build_pointer_plus (sav, gpr);
8198 gimplify_assign (int_addr, t, pre_p);
8202 /* sse_addr = fpr + sav; */
8203 t = fold_build_pointer_plus (sav, fpr);
8204 gimplify_assign (sse_addr, t, pre_p);
8208 int i, prev_size = 0;
8209 tree temp = create_tmp_var (type, "va_arg_tmp");
8212 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8213 gimplify_assign (addr, t, pre_p);
8215 for (i = 0; i < XVECLEN (container, 0); i++)
8217 rtx slot = XVECEXP (container, 0, i);
8218 rtx reg = XEXP (slot, 0);
8219 enum machine_mode mode = GET_MODE (reg);
8225 tree dest_addr, dest;
8226 int cur_size = GET_MODE_SIZE (mode);
8228 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8229 prev_size = INTVAL (XEXP (slot, 1));
8230 if (prev_size + cur_size > size)
8232 cur_size = size - prev_size;
8233 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8234 if (mode == BLKmode)
8237 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8238 if (mode == GET_MODE (reg))
8239 addr_type = build_pointer_type (piece_type);
8241 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8243 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8246 if (SSE_REGNO_P (REGNO (reg)))
8248 src_addr = sse_addr;
8249 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8253 src_addr = int_addr;
8254 src_offset = REGNO (reg) * 8;
8256 src_addr = fold_convert (addr_type, src_addr);
8257 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8259 dest_addr = fold_convert (daddr_type, addr);
8260 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8261 if (cur_size == GET_MODE_SIZE (mode))
8263 src = build_va_arg_indirect_ref (src_addr);
8264 dest = build_va_arg_indirect_ref (dest_addr);
8266 gimplify_assign (dest, src, pre_p);
8271 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8272 3, dest_addr, src_addr,
8273 size_int (cur_size));
8274 gimplify_and_add (copy, pre_p);
8276 prev_size += cur_size;
8282 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8283 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8284 gimplify_assign (gpr, t, pre_p);
8289 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8290 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8291 gimplify_assign (fpr, t, pre_p);
8294 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8296 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8299 /* ... otherwise out of the overflow area. */
8301 /* When we align parameter on stack for caller, if the parameter
8302 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8303 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8304 here with caller. */
8305 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8306 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8307 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8309 /* Care for on-stack alignment if needed. */
8310 if (arg_boundary <= 64 || size == 0)
8314 HOST_WIDE_INT align = arg_boundary / 8;
8315 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8316 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8317 build_int_cst (TREE_TYPE (t), -align));
8320 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8321 gimplify_assign (addr, t, pre_p);
8323 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8324 gimplify_assign (unshare_expr (ovf), t, pre_p);
8327 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8329 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8330 addr = fold_convert (ptrtype, addr);
8333 addr = build_va_arg_indirect_ref (addr);
8334 return build_va_arg_indirect_ref (addr);
8337 /* Return true if OPNUM's MEM should be matched
8338 in movabs* patterns. */
8341 ix86_check_movabs (rtx insn, int opnum)
8345 set = PATTERN (insn);
8346 if (GET_CODE (set) == PARALLEL)
8347 set = XVECEXP (set, 0, 0);
8348 gcc_assert (GET_CODE (set) == SET);
8349 mem = XEXP (set, opnum);
8350 while (GET_CODE (mem) == SUBREG)
8351 mem = SUBREG_REG (mem);
8352 gcc_assert (MEM_P (mem));
8353 return volatile_ok || !MEM_VOLATILE_P (mem);
8356 /* Initialize the table of extra 80387 mathematical constants. */
8359 init_ext_80387_constants (void)
8361 static const char * cst[5] =
8363 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8364 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8365 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8366 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8367 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8371 for (i = 0; i < 5; i++)
8373 real_from_string (&ext_80387_constants_table[i], cst[i]);
8374 /* Ensure each constant is rounded to XFmode precision. */
8375 real_convert (&ext_80387_constants_table[i],
8376 XFmode, &ext_80387_constants_table[i]);
8379 ext_80387_constants_init = 1;
8382 /* Return non-zero if the constant is something that
8383 can be loaded with a special instruction. */
8386 standard_80387_constant_p (rtx x)
8388 enum machine_mode mode = GET_MODE (x);
8392 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8395 if (x == CONST0_RTX (mode))
8397 if (x == CONST1_RTX (mode))
8400 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8402 /* For XFmode constants, try to find a special 80387 instruction when
8403 optimizing for size or on those CPUs that benefit from them. */
8405 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8409 if (! ext_80387_constants_init)
8410 init_ext_80387_constants ();
8412 for (i = 0; i < 5; i++)
8413 if (real_identical (&r, &ext_80387_constants_table[i]))
8417 /* Load of the constant -0.0 or -1.0 will be split as
8418 fldz;fchs or fld1;fchs sequence. */
8419 if (real_isnegzero (&r))
8421 if (real_identical (&r, &dconstm1))
8427 /* Return the opcode of the special instruction to be used to load
8431 standard_80387_constant_opcode (rtx x)
8433 switch (standard_80387_constant_p (x))
8457 /* Return the CONST_DOUBLE representing the 80387 constant that is
8458 loaded by the specified special instruction. The argument IDX
8459 matches the return value from standard_80387_constant_p. */
8462 standard_80387_constant_rtx (int idx)
8466 if (! ext_80387_constants_init)
8467 init_ext_80387_constants ();
8483 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8487 /* Return 1 if X is all 0s and 2 if x is all 1s
8488 in supported SSE/AVX vector mode. */
8491 standard_sse_constant_p (rtx x)
8493 enum machine_mode mode = GET_MODE (x);
8495 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8497 if (vector_all_ones_operand (x, mode))
8519 /* Return the opcode of the special instruction to be used to load
8523 standard_sse_constant_opcode (rtx insn, rtx x)
8525 switch (standard_sse_constant_p (x))
8528 switch (get_attr_mode (insn))
8531 return "%vpxor\t%0, %d0";
8533 return "%vxorpd\t%0, %d0";
8535 return "%vxorps\t%0, %d0";
8538 return "vpxor\t%x0, %x0, %x0";
8540 return "vxorpd\t%x0, %x0, %x0";
8542 return "vxorps\t%x0, %x0, %x0";
8550 return "vpcmpeqd\t%0, %0, %0";
8552 return "pcmpeqd\t%0, %0";
8560 /* Returns true if OP contains a symbol reference */
8563 symbolic_reference_mentioned_p (rtx op)
8568 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8571 fmt = GET_RTX_FORMAT (GET_CODE (op));
8572 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8578 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8579 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8583 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8590 /* Return true if it is appropriate to emit `ret' instructions in the
8591 body of a function. Do this only if the epilogue is simple, needing a
8592 couple of insns. Prior to reloading, we can't tell how many registers
8593 must be saved, so return false then. Return false if there is no frame
8594 marker to de-allocate. */
8597 ix86_can_use_return_insn_p (void)
8599 struct ix86_frame frame;
8601 if (! reload_completed || frame_pointer_needed)
8604 /* Don't allow more than 32k pop, since that's all we can do
8605 with one instruction. */
8606 if (crtl->args.pops_args && crtl->args.size >= 32768)
8609 ix86_compute_frame_layout (&frame);
8610 return (frame.stack_pointer_offset == UNITS_PER_WORD
8611 && (frame.nregs + frame.nsseregs) == 0);
8614 /* Value should be nonzero if functions must have frame pointers.
8615 Zero means the frame pointer need not be set up (and parms may
8616 be accessed via the stack pointer) in functions that seem suitable. */
8619 ix86_frame_pointer_required (void)
8621 /* If we accessed previous frames, then the generated code expects
8622 to be able to access the saved ebp value in our frame. */
8623 if (cfun->machine->accesses_prev_frame)
8626 /* Several x86 os'es need a frame pointer for other reasons,
8627 usually pertaining to setjmp. */
8628 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8631 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8632 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8635 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8636 allocation is 4GB. */
8637 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8640 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8641 turns off the frame pointer by default. Turn it back on now if
8642 we've not got a leaf function. */
8643 if (TARGET_OMIT_LEAF_FRAME_POINTER
8645 || ix86_current_function_calls_tls_descriptor))
8648 if (crtl->profile && !flag_fentry)
8654 /* Record that the current function accesses previous call frames. */
8657 ix86_setup_frame_addresses (void)
8659 cfun->machine->accesses_prev_frame = 1;
8662 #ifndef USE_HIDDEN_LINKONCE
8663 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8664 # define USE_HIDDEN_LINKONCE 1
8666 # define USE_HIDDEN_LINKONCE 0
8670 static int pic_labels_used;
8672 /* Fills in the label name that should be used for a pc thunk for
8673 the given register. */
8676 get_pc_thunk_name (char name[32], unsigned int regno)
8678 gcc_assert (!TARGET_64BIT);
8680 if (USE_HIDDEN_LINKONCE)
8681 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8683 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8687 /* This function generates code for -fpic that loads %ebx with
8688 the return address of the caller and then returns. */
8691 ix86_code_end (void)
8696 for (regno = AX_REG; regno <= SP_REG; regno++)
8701 if (!(pic_labels_used & (1 << regno)))
8704 get_pc_thunk_name (name, regno);
8706 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8707 get_identifier (name),
8708 build_function_type_list (void_type_node, NULL_TREE));
8709 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8710 NULL_TREE, void_type_node);
8711 TREE_PUBLIC (decl) = 1;
8712 TREE_STATIC (decl) = 1;
8713 DECL_IGNORED_P (decl) = 1;
8718 switch_to_section (darwin_sections[text_coal_section]);
8719 fputs ("\t.weak_definition\t", asm_out_file);
8720 assemble_name (asm_out_file, name);
8721 fputs ("\n\t.private_extern\t", asm_out_file);
8722 assemble_name (asm_out_file, name);
8723 putc ('\n', asm_out_file);
8724 ASM_OUTPUT_LABEL (asm_out_file, name);
8725 DECL_WEAK (decl) = 1;
8729 if (USE_HIDDEN_LINKONCE)
8731 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8733 targetm.asm_out.unique_section (decl, 0);
8734 switch_to_section (get_named_section (decl, NULL, 0));
8736 targetm.asm_out.globalize_label (asm_out_file, name);
8737 fputs ("\t.hidden\t", asm_out_file);
8738 assemble_name (asm_out_file, name);
8739 putc ('\n', asm_out_file);
8740 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8744 switch_to_section (text_section);
8745 ASM_OUTPUT_LABEL (asm_out_file, name);
8748 DECL_INITIAL (decl) = make_node (BLOCK);
8749 current_function_decl = decl;
8750 init_function_start (decl);
8751 first_function_block_is_cold = false;
8752 /* Make sure unwind info is emitted for the thunk if needed. */
8753 final_start_function (emit_barrier (), asm_out_file, 1);
8755 /* Pad stack IP move with 4 instructions (two NOPs count
8756 as one instruction). */
8757 if (TARGET_PAD_SHORT_FUNCTION)
8762 fputs ("\tnop\n", asm_out_file);
8765 xops[0] = gen_rtx_REG (Pmode, regno);
8766 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8767 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8768 fputs ("\tret\n", asm_out_file);
8769 final_end_function ();
8770 init_insn_lengths ();
8771 free_after_compilation (cfun);
8773 current_function_decl = NULL;
8776 if (flag_split_stack)
8777 file_end_indicate_split_stack ();
8780 /* Emit code for the SET_GOT patterns. */
8783 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8789 if (TARGET_VXWORKS_RTP && flag_pic)
8791 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8792 xops[2] = gen_rtx_MEM (Pmode,
8793 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8794 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8796 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8797 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8798 an unadorned address. */
8799 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8800 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8801 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8805 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8809 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8811 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8814 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8815 is what will be referenced by the Mach-O PIC subsystem. */
8817 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8820 targetm.asm_out.internal_label (asm_out_file, "L",
8821 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8826 get_pc_thunk_name (name, REGNO (dest));
8827 pic_labels_used |= 1 << REGNO (dest);
8829 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8830 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8831 output_asm_insn ("call\t%X2", xops);
8832 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8833 is what will be referenced by the Mach-O PIC subsystem. */
8836 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8838 targetm.asm_out.internal_label (asm_out_file, "L",
8839 CODE_LABEL_NUMBER (label));
8844 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8849 /* Generate an "push" pattern for input ARG. */
8854 struct machine_function *m = cfun->machine;
8856 if (m->fs.cfa_reg == stack_pointer_rtx)
8857 m->fs.cfa_offset += UNITS_PER_WORD;
8858 m->fs.sp_offset += UNITS_PER_WORD;
8860 if (REG_P (arg) && GET_MODE (arg) != word_mode)
8861 arg = gen_rtx_REG (word_mode, REGNO (arg));
8863 return gen_rtx_SET (VOIDmode,
8864 gen_rtx_MEM (word_mode,
8865 gen_rtx_PRE_DEC (Pmode,
8866 stack_pointer_rtx)),
8870 /* Generate an "pop" pattern for input ARG. */
8875 if (REG_P (arg) && GET_MODE (arg) != word_mode)
8876 arg = gen_rtx_REG (word_mode, REGNO (arg));
8878 return gen_rtx_SET (VOIDmode,
8880 gen_rtx_MEM (word_mode,
8881 gen_rtx_POST_INC (Pmode,
8882 stack_pointer_rtx)));
8885 /* Return >= 0 if there is an unused call-clobbered register available
8886 for the entire function. */
8889 ix86_select_alt_pic_regnum (void)
8893 && !ix86_current_function_calls_tls_descriptor)
8896 /* Can't use the same register for both PIC and DRAP. */
8898 drap = REGNO (crtl->drap_reg);
8901 for (i = 2; i >= 0; --i)
8902 if (i != drap && !df_regs_ever_live_p (i))
8906 return INVALID_REGNUM;
8909 /* Return TRUE if we need to save REGNO. */
8912 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8914 if (pic_offset_table_rtx
8915 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8916 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8918 || crtl->calls_eh_return
8919 || crtl->uses_const_pool))
8920 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8922 if (crtl->calls_eh_return && maybe_eh_return)
8927 unsigned test = EH_RETURN_DATA_REGNO (i);
8928 if (test == INVALID_REGNUM)
8935 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8938 return (df_regs_ever_live_p (regno)
8939 && !call_used_regs[regno]
8940 && !fixed_regs[regno]
8941 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8944 /* Return number of saved general prupose registers. */
8947 ix86_nsaved_regs (void)
8952 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8953 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8958 /* Return number of saved SSE registrers. */
8961 ix86_nsaved_sseregs (void)
8966 if (!TARGET_64BIT_MS_ABI)
8968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8969 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8974 /* Given FROM and TO register numbers, say whether this elimination is
8975 allowed. If stack alignment is needed, we can only replace argument
8976 pointer with hard frame pointer, or replace frame pointer with stack
8977 pointer. Otherwise, frame pointer elimination is automatically
8978 handled and all other eliminations are valid. */
8981 ix86_can_eliminate (const int from, const int to)
8983 if (stack_realign_fp)
8984 return ((from == ARG_POINTER_REGNUM
8985 && to == HARD_FRAME_POINTER_REGNUM)
8986 || (from == FRAME_POINTER_REGNUM
8987 && to == STACK_POINTER_REGNUM));
8989 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8992 /* Return the offset between two registers, one to be eliminated, and the other
8993 its replacement, at the start of a routine. */
8996 ix86_initial_elimination_offset (int from, int to)
8998 struct ix86_frame frame;
8999 ix86_compute_frame_layout (&frame);
9001 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9002 return frame.hard_frame_pointer_offset;
9003 else if (from == FRAME_POINTER_REGNUM
9004 && to == HARD_FRAME_POINTER_REGNUM)
9005 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9008 gcc_assert (to == STACK_POINTER_REGNUM);
9010 if (from == ARG_POINTER_REGNUM)
9011 return frame.stack_pointer_offset;
9013 gcc_assert (from == FRAME_POINTER_REGNUM);
9014 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9018 /* In a dynamically-aligned function, we can't know the offset from
9019 stack pointer to frame pointer, so we must ensure that setjmp
9020 eliminates fp against the hard fp (%ebp) rather than trying to
9021 index from %esp up to the top of the frame across a gap that is
9022 of unknown (at compile-time) size. */
9024 ix86_builtin_setjmp_frame_value (void)
9026 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9029 /* When using -fsplit-stack, the allocation routines set a field in
9030 the TCB to the bottom of the stack plus this much space, measured
9033 #define SPLIT_STACK_AVAILABLE 256
9035 /* Fill structure ix86_frame about frame of currently computed function. */
9038 ix86_compute_frame_layout (struct ix86_frame *frame)
9040 unsigned HOST_WIDE_INT stack_alignment_needed;
9041 HOST_WIDE_INT offset;
9042 unsigned HOST_WIDE_INT preferred_alignment;
9043 HOST_WIDE_INT size = get_frame_size ();
9044 HOST_WIDE_INT to_allocate;
9046 frame->nregs = ix86_nsaved_regs ();
9047 frame->nsseregs = ix86_nsaved_sseregs ();
9049 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9050 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9052 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9053 function prologues and leaf. */
9054 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9055 && (!crtl->is_leaf || cfun->calls_alloca != 0
9056 || ix86_current_function_calls_tls_descriptor))
9058 preferred_alignment = 16;
9059 stack_alignment_needed = 16;
9060 crtl->preferred_stack_boundary = 128;
9061 crtl->stack_alignment_needed = 128;
9064 gcc_assert (!size || stack_alignment_needed);
9065 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9066 gcc_assert (preferred_alignment <= stack_alignment_needed);
9068 /* For SEH we have to limit the amount of code movement into the prologue.
9069 At present we do this via a BLOCKAGE, at which point there's very little
9070 scheduling that can be done, which means that there's very little point
9071 in doing anything except PUSHs. */
9073 cfun->machine->use_fast_prologue_epilogue = false;
9075 /* During reload iteration the amount of registers saved can change.
9076 Recompute the value as needed. Do not recompute when amount of registers
9077 didn't change as reload does multiple calls to the function and does not
9078 expect the decision to change within single iteration. */
9079 else if (!optimize_function_for_size_p (cfun)
9080 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9082 int count = frame->nregs;
9083 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9085 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9087 /* The fast prologue uses move instead of push to save registers. This
9088 is significantly longer, but also executes faster as modern hardware
9089 can execute the moves in parallel, but can't do that for push/pop.
9091 Be careful about choosing what prologue to emit: When function takes
9092 many instructions to execute we may use slow version as well as in
9093 case function is known to be outside hot spot (this is known with
9094 feedback only). Weight the size of function by number of registers
9095 to save as it is cheap to use one or two push instructions but very
9096 slow to use many of them. */
9098 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9099 if (node->frequency < NODE_FREQUENCY_NORMAL
9100 || (flag_branch_probabilities
9101 && node->frequency < NODE_FREQUENCY_HOT))
9102 cfun->machine->use_fast_prologue_epilogue = false;
9104 cfun->machine->use_fast_prologue_epilogue
9105 = !expensive_function_p (count);
9108 frame->save_regs_using_mov
9109 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9110 /* If static stack checking is enabled and done with probes,
9111 the registers need to be saved before allocating the frame. */
9112 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9114 /* Skip return address. */
9115 offset = UNITS_PER_WORD;
9117 /* Skip pushed static chain. */
9118 if (ix86_static_chain_on_stack)
9119 offset += UNITS_PER_WORD;
9121 /* Skip saved base pointer. */
9122 if (frame_pointer_needed)
9123 offset += UNITS_PER_WORD;
9124 frame->hfp_save_offset = offset;
9126 /* The traditional frame pointer location is at the top of the frame. */
9127 frame->hard_frame_pointer_offset = offset;
9129 /* Register save area */
9130 offset += frame->nregs * UNITS_PER_WORD;
9131 frame->reg_save_offset = offset;
9133 /* On SEH target, registers are pushed just before the frame pointer
9136 frame->hard_frame_pointer_offset = offset;
9138 /* Align and set SSE register save area. */
9139 if (frame->nsseregs)
9141 /* The only ABI that has saved SSE registers (Win64) also has a
9142 16-byte aligned default stack, and thus we don't need to be
9143 within the re-aligned local stack frame to save them. */
9144 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9145 offset = (offset + 16 - 1) & -16;
9146 offset += frame->nsseregs * 16;
9148 frame->sse_reg_save_offset = offset;
9150 /* The re-aligned stack starts here. Values before this point are not
9151 directly comparable with values below this point. In order to make
9152 sure that no value happens to be the same before and after, force
9153 the alignment computation below to add a non-zero value. */
9154 if (stack_realign_fp)
9155 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9158 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9159 offset += frame->va_arg_size;
9161 /* Align start of frame for local function. */
9162 if (stack_realign_fp
9163 || offset != frame->sse_reg_save_offset
9166 || cfun->calls_alloca
9167 || ix86_current_function_calls_tls_descriptor)
9168 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9170 /* Frame pointer points here. */
9171 frame->frame_pointer_offset = offset;
9175 /* Add outgoing arguments area. Can be skipped if we eliminated
9176 all the function calls as dead code.
9177 Skipping is however impossible when function calls alloca. Alloca
9178 expander assumes that last crtl->outgoing_args_size
9179 of stack frame are unused. */
9180 if (ACCUMULATE_OUTGOING_ARGS
9181 && (!crtl->is_leaf || cfun->calls_alloca
9182 || ix86_current_function_calls_tls_descriptor))
9184 offset += crtl->outgoing_args_size;
9185 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9188 frame->outgoing_arguments_size = 0;
9190 /* Align stack boundary. Only needed if we're calling another function
9192 if (!crtl->is_leaf || cfun->calls_alloca
9193 || ix86_current_function_calls_tls_descriptor)
9194 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9196 /* We've reached end of stack frame. */
9197 frame->stack_pointer_offset = offset;
9199 /* Size prologue needs to allocate. */
9200 to_allocate = offset - frame->sse_reg_save_offset;
9202 if ((!to_allocate && frame->nregs <= 1)
9203 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9204 frame->save_regs_using_mov = false;
9206 if (ix86_using_red_zone ()
9207 && crtl->sp_is_unchanging
9209 && !ix86_current_function_calls_tls_descriptor)
9211 frame->red_zone_size = to_allocate;
9212 if (frame->save_regs_using_mov)
9213 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9214 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9215 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9218 frame->red_zone_size = 0;
9219 frame->stack_pointer_offset -= frame->red_zone_size;
9221 /* The SEH frame pointer location is near the bottom of the frame.
9222 This is enforced by the fact that the difference between the
9223 stack pointer and the frame pointer is limited to 240 bytes in
9224 the unwind data structure. */
9229 /* If we can leave the frame pointer where it is, do so. Also, returns
9230 the establisher frame for __builtin_frame_address (0). */
9231 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9232 if (diff <= SEH_MAX_FRAME_SIZE
9233 && (diff > 240 || (diff & 15) != 0)
9234 && !crtl->accesses_prior_frames)
9236 /* Ideally we'd determine what portion of the local stack frame
9237 (within the constraint of the lowest 240) is most heavily used.
9238 But without that complication, simply bias the frame pointer
9239 by 128 bytes so as to maximize the amount of the local stack
9240 frame that is addressable with 8-bit offsets. */
9241 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9246 /* This is semi-inlined memory_address_length, but simplified
9247 since we know that we're always dealing with reg+offset, and
9248 to avoid having to create and discard all that rtl. */
9251 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9257 /* EBP and R13 cannot be encoded without an offset. */
9258 len = (regno == BP_REG || regno == R13_REG);
9260 else if (IN_RANGE (offset, -128, 127))
9263 /* ESP and R12 must be encoded with a SIB byte. */
9264 if (regno == SP_REG || regno == R12_REG)
9270 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9271 The valid base registers are taken from CFUN->MACHINE->FS. */
9274 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9276 const struct machine_function *m = cfun->machine;
9277 rtx base_reg = NULL;
9278 HOST_WIDE_INT base_offset = 0;
9280 if (m->use_fast_prologue_epilogue)
9282 /* Choose the base register most likely to allow the most scheduling
9283 opportunities. Generally FP is valid throughout the function,
9284 while DRAP must be reloaded within the epilogue. But choose either
9285 over the SP due to increased encoding size. */
9289 base_reg = hard_frame_pointer_rtx;
9290 base_offset = m->fs.fp_offset - cfa_offset;
9292 else if (m->fs.drap_valid)
9294 base_reg = crtl->drap_reg;
9295 base_offset = 0 - cfa_offset;
9297 else if (m->fs.sp_valid)
9299 base_reg = stack_pointer_rtx;
9300 base_offset = m->fs.sp_offset - cfa_offset;
9305 HOST_WIDE_INT toffset;
9308 /* Choose the base register with the smallest address encoding.
9309 With a tie, choose FP > DRAP > SP. */
9312 base_reg = stack_pointer_rtx;
9313 base_offset = m->fs.sp_offset - cfa_offset;
9314 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9316 if (m->fs.drap_valid)
9318 toffset = 0 - cfa_offset;
9319 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9322 base_reg = crtl->drap_reg;
9323 base_offset = toffset;
9329 toffset = m->fs.fp_offset - cfa_offset;
9330 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9333 base_reg = hard_frame_pointer_rtx;
9334 base_offset = toffset;
9339 gcc_assert (base_reg != NULL);
9341 return plus_constant (Pmode, base_reg, base_offset);
9344 /* Emit code to save registers in the prologue. */
9347 ix86_emit_save_regs (void)
9352 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9353 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9355 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9356 RTX_FRAME_RELATED_P (insn) = 1;
9360 /* Emit a single register save at CFA - CFA_OFFSET. */
9363 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9364 HOST_WIDE_INT cfa_offset)
9366 struct machine_function *m = cfun->machine;
9367 rtx reg = gen_rtx_REG (mode, regno);
9368 rtx mem, addr, base, insn;
9370 addr = choose_baseaddr (cfa_offset);
9371 mem = gen_frame_mem (mode, addr);
9373 /* For SSE saves, we need to indicate the 128-bit alignment. */
9374 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9376 insn = emit_move_insn (mem, reg);
9377 RTX_FRAME_RELATED_P (insn) = 1;
9380 if (GET_CODE (base) == PLUS)
9381 base = XEXP (base, 0);
9382 gcc_checking_assert (REG_P (base));
9384 /* When saving registers into a re-aligned local stack frame, avoid
9385 any tricky guessing by dwarf2out. */
9386 if (m->fs.realigned)
9388 gcc_checking_assert (stack_realign_drap);
9390 if (regno == REGNO (crtl->drap_reg))
9392 /* A bit of a hack. We force the DRAP register to be saved in
9393 the re-aligned stack frame, which provides us with a copy
9394 of the CFA that will last past the prologue. Install it. */
9395 gcc_checking_assert (cfun->machine->fs.fp_valid);
9396 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9397 cfun->machine->fs.fp_offset - cfa_offset);
9398 mem = gen_rtx_MEM (mode, addr);
9399 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9403 /* The frame pointer is a stable reference within the
9404 aligned frame. Use it. */
9405 gcc_checking_assert (cfun->machine->fs.fp_valid);
9406 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9407 cfun->machine->fs.fp_offset - cfa_offset);
9408 mem = gen_rtx_MEM (mode, addr);
9409 add_reg_note (insn, REG_CFA_EXPRESSION,
9410 gen_rtx_SET (VOIDmode, mem, reg));
9414 /* The memory may not be relative to the current CFA register,
9415 which means that we may need to generate a new pattern for
9416 use by the unwind info. */
9417 else if (base != m->fs.cfa_reg)
9419 addr = plus_constant (Pmode, m->fs.cfa_reg,
9420 m->fs.cfa_offset - cfa_offset);
9421 mem = gen_rtx_MEM (mode, addr);
9422 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9426 /* Emit code to save registers using MOV insns.
9427 First register is stored at CFA - CFA_OFFSET. */
9429 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9433 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9434 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9436 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9437 cfa_offset -= UNITS_PER_WORD;
9441 /* Emit code to save SSE registers using MOV insns.
9442 First register is stored at CFA - CFA_OFFSET. */
9444 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9448 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9449 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9451 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9456 static GTY(()) rtx queued_cfa_restores;
9458 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9459 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9460 Don't add the note if the previously saved value will be left untouched
9461 within stack red-zone till return, as unwinders can find the same value
9462 in the register and on the stack. */
9465 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9467 if (!crtl->shrink_wrapped
9468 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9473 add_reg_note (insn, REG_CFA_RESTORE, reg);
9474 RTX_FRAME_RELATED_P (insn) = 1;
9478 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9481 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9484 ix86_add_queued_cfa_restore_notes (rtx insn)
9487 if (!queued_cfa_restores)
9489 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9491 XEXP (last, 1) = REG_NOTES (insn);
9492 REG_NOTES (insn) = queued_cfa_restores;
9493 queued_cfa_restores = NULL_RTX;
9494 RTX_FRAME_RELATED_P (insn) = 1;
9497 /* Expand prologue or epilogue stack adjustment.
9498 The pattern exist to put a dependency on all ebp-based memory accesses.
9499 STYLE should be negative if instructions should be marked as frame related,
9500 zero if %r11 register is live and cannot be freely used and positive
9504 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9505 int style, bool set_cfa)
9507 struct machine_function *m = cfun->machine;
9509 bool add_frame_related_expr = false;
9511 if (Pmode == SImode)
9512 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9513 else if (x86_64_immediate_operand (offset, DImode))
9514 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9518 /* r11 is used by indirect sibcall return as well, set before the
9519 epilogue and used after the epilogue. */
9521 tmp = gen_rtx_REG (DImode, R11_REG);
9524 gcc_assert (src != hard_frame_pointer_rtx
9525 && dest != hard_frame_pointer_rtx);
9526 tmp = hard_frame_pointer_rtx;
9528 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9530 add_frame_related_expr = true;
9532 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9535 insn = emit_insn (insn);
9537 ix86_add_queued_cfa_restore_notes (insn);
9543 gcc_assert (m->fs.cfa_reg == src);
9544 m->fs.cfa_offset += INTVAL (offset);
9545 m->fs.cfa_reg = dest;
9547 r = gen_rtx_PLUS (Pmode, src, offset);
9548 r = gen_rtx_SET (VOIDmode, dest, r);
9549 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9550 RTX_FRAME_RELATED_P (insn) = 1;
9554 RTX_FRAME_RELATED_P (insn) = 1;
9555 if (add_frame_related_expr)
9557 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9558 r = gen_rtx_SET (VOIDmode, dest, r);
9559 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9563 if (dest == stack_pointer_rtx)
9565 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9566 bool valid = m->fs.sp_valid;
9568 if (src == hard_frame_pointer_rtx)
9570 valid = m->fs.fp_valid;
9571 ooffset = m->fs.fp_offset;
9573 else if (src == crtl->drap_reg)
9575 valid = m->fs.drap_valid;
9580 /* Else there are two possibilities: SP itself, which we set
9581 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9582 taken care of this by hand along the eh_return path. */
9583 gcc_checking_assert (src == stack_pointer_rtx
9584 || offset == const0_rtx);
9587 m->fs.sp_offset = ooffset - INTVAL (offset);
9588 m->fs.sp_valid = valid;
9592 /* Find an available register to be used as dynamic realign argument
9593 pointer regsiter. Such a register will be written in prologue and
9594 used in begin of body, so it must not be
9595 1. parameter passing register.
9597 We reuse static-chain register if it is available. Otherwise, we
9598 use DI for i386 and R13 for x86-64. We chose R13 since it has
9601 Return: the regno of chosen register. */
9604 find_drap_reg (void)
9606 tree decl = cfun->decl;
9610 /* Use R13 for nested function or function need static chain.
9611 Since function with tail call may use any caller-saved
9612 registers in epilogue, DRAP must not use caller-saved
9613 register in such case. */
9614 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9621 /* Use DI for nested function or function need static chain.
9622 Since function with tail call may use any caller-saved
9623 registers in epilogue, DRAP must not use caller-saved
9624 register in such case. */
9625 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9628 /* Reuse static chain register if it isn't used for parameter
9630 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9632 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9633 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9640 /* Return minimum incoming stack alignment. */
9643 ix86_minimum_incoming_stack_boundary (bool sibcall)
9645 unsigned int incoming_stack_boundary;
9647 /* Prefer the one specified at command line. */
9648 if (ix86_user_incoming_stack_boundary)
9649 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9650 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9651 if -mstackrealign is used, it isn't used for sibcall check and
9652 estimated stack alignment is 128bit. */
9655 && ix86_force_align_arg_pointer
9656 && crtl->stack_alignment_estimated == 128)
9657 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9659 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9661 /* Incoming stack alignment can be changed on individual functions
9662 via force_align_arg_pointer attribute. We use the smallest
9663 incoming stack boundary. */
9664 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9665 && lookup_attribute (ix86_force_align_arg_pointer_string,
9666 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9667 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9669 /* The incoming stack frame has to be aligned at least at
9670 parm_stack_boundary. */
9671 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9672 incoming_stack_boundary = crtl->parm_stack_boundary;
9674 /* Stack at entrance of main is aligned by runtime. We use the
9675 smallest incoming stack boundary. */
9676 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9677 && DECL_NAME (current_function_decl)
9678 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9679 && DECL_FILE_SCOPE_P (current_function_decl))
9680 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9682 return incoming_stack_boundary;
9685 /* Update incoming stack boundary and estimated stack alignment. */
9688 ix86_update_stack_boundary (void)
9690 ix86_incoming_stack_boundary
9691 = ix86_minimum_incoming_stack_boundary (false);
9693 /* x86_64 vararg needs 16byte stack alignment for register save
9697 && crtl->stack_alignment_estimated < 128)
9698 crtl->stack_alignment_estimated = 128;
9701 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9702 needed or an rtx for DRAP otherwise. */
9705 ix86_get_drap_rtx (void)
9707 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9708 crtl->need_drap = true;
9710 if (stack_realign_drap)
9712 /* Assign DRAP to vDRAP and returns vDRAP */
9713 unsigned int regno = find_drap_reg ();
9718 arg_ptr = gen_rtx_REG (Pmode, regno);
9719 crtl->drap_reg = arg_ptr;
9722 drap_vreg = copy_to_reg (arg_ptr);
9726 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9729 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9730 RTX_FRAME_RELATED_P (insn) = 1;
9738 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9741 ix86_internal_arg_pointer (void)
9743 return virtual_incoming_args_rtx;
9746 struct scratch_reg {
9751 /* Return a short-lived scratch register for use on function entry.
9752 In 32-bit mode, it is valid only after the registers are saved
9753 in the prologue. This register must be released by means of
9754 release_scratch_register_on_entry once it is dead. */
9757 get_scratch_register_on_entry (struct scratch_reg *sr)
9765 /* We always use R11 in 64-bit mode. */
9770 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9772 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9774 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9775 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9776 int regparm = ix86_function_regparm (fntype, decl);
9778 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9780 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9781 for the static chain register. */
9782 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9783 && drap_regno != AX_REG)
9785 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9786 for the static chain register. */
9787 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
9789 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
9791 /* ecx is the static chain register. */
9792 else if (regparm < 3 && !fastcall_p && !thiscall_p
9794 && drap_regno != CX_REG)
9796 else if (ix86_save_reg (BX_REG, true))
9798 /* esi is the static chain register. */
9799 else if (!(regparm == 3 && static_chain_p)
9800 && ix86_save_reg (SI_REG, true))
9802 else if (ix86_save_reg (DI_REG, true))
9806 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9811 sr->reg = gen_rtx_REG (Pmode, regno);
9814 rtx insn = emit_insn (gen_push (sr->reg));
9815 RTX_FRAME_RELATED_P (insn) = 1;
9819 /* Release a scratch register obtained from the preceding function. */
9822 release_scratch_register_on_entry (struct scratch_reg *sr)
9826 struct machine_function *m = cfun->machine;
9827 rtx x, insn = emit_insn (gen_pop (sr->reg));
9829 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9830 RTX_FRAME_RELATED_P (insn) = 1;
9831 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9832 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9833 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9834 m->fs.sp_offset -= UNITS_PER_WORD;
9838 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9840 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9843 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9845 /* We skip the probe for the first interval + a small dope of 4 words and
9846 probe that many bytes past the specified size to maintain a protection
9847 area at the botton of the stack. */
9848 const int dope = 4 * UNITS_PER_WORD;
9849 rtx size_rtx = GEN_INT (size), last;
9851 /* See if we have a constant small number of probes to generate. If so,
9852 that's the easy case. The run-time loop is made up of 11 insns in the
9853 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9854 for n # of intervals. */
9855 if (size <= 5 * PROBE_INTERVAL)
9857 HOST_WIDE_INT i, adjust;
9858 bool first_probe = true;
9860 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9861 values of N from 1 until it exceeds SIZE. If only one probe is
9862 needed, this will not generate any code. Then adjust and probe
9863 to PROBE_INTERVAL + SIZE. */
9864 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9868 adjust = 2 * PROBE_INTERVAL + dope;
9869 first_probe = false;
9872 adjust = PROBE_INTERVAL;
9874 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9875 plus_constant (Pmode, stack_pointer_rtx,
9877 emit_stack_probe (stack_pointer_rtx);
9881 adjust = size + PROBE_INTERVAL + dope;
9883 adjust = size + PROBE_INTERVAL - i;
9885 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9886 plus_constant (Pmode, stack_pointer_rtx,
9888 emit_stack_probe (stack_pointer_rtx);
9890 /* Adjust back to account for the additional first interval. */
9891 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9892 plus_constant (Pmode, stack_pointer_rtx,
9893 PROBE_INTERVAL + dope)));
9896 /* Otherwise, do the same as above, but in a loop. Note that we must be
9897 extra careful with variables wrapping around because we might be at
9898 the very top (or the very bottom) of the address space and we have
9899 to be able to handle this case properly; in particular, we use an
9900 equality test for the loop condition. */
9903 HOST_WIDE_INT rounded_size;
9904 struct scratch_reg sr;
9906 get_scratch_register_on_entry (&sr);
9909 /* Step 1: round SIZE to the previous multiple of the interval. */
9911 rounded_size = size & -PROBE_INTERVAL;
9914 /* Step 2: compute initial and final value of the loop counter. */
9916 /* SP = SP_0 + PROBE_INTERVAL. */
9917 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9918 plus_constant (Pmode, stack_pointer_rtx,
9919 - (PROBE_INTERVAL + dope))));
9921 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9922 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9923 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9924 gen_rtx_PLUS (Pmode, sr.reg,
9925 stack_pointer_rtx)));
9930 while (SP != LAST_ADDR)
9932 SP = SP + PROBE_INTERVAL
9936 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9937 values of N from 1 until it is equal to ROUNDED_SIZE. */
9939 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9942 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9943 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9945 if (size != rounded_size)
9947 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9948 plus_constant (Pmode, stack_pointer_rtx,
9949 rounded_size - size)));
9950 emit_stack_probe (stack_pointer_rtx);
9953 /* Adjust back to account for the additional first interval. */
9954 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9955 plus_constant (Pmode, stack_pointer_rtx,
9956 PROBE_INTERVAL + dope)));
9958 release_scratch_register_on_entry (&sr);
9961 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9963 /* Even if the stack pointer isn't the CFA register, we need to correctly
9964 describe the adjustments made to it, in particular differentiate the
9965 frame-related ones from the frame-unrelated ones. */
9968 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9969 XVECEXP (expr, 0, 0)
9970 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9971 plus_constant (Pmode, stack_pointer_rtx, -size));
9972 XVECEXP (expr, 0, 1)
9973 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9974 plus_constant (Pmode, stack_pointer_rtx,
9975 PROBE_INTERVAL + dope + size));
9976 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9977 RTX_FRAME_RELATED_P (last) = 1;
9979 cfun->machine->fs.sp_offset += size;
9982 /* Make sure nothing is scheduled before we are done. */
9983 emit_insn (gen_blockage ());
9986 /* Adjust the stack pointer up to REG while probing it. */
9989 output_adjust_stack_and_probe (rtx reg)
9991 static int labelno = 0;
9992 char loop_lab[32], end_lab[32];
9995 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9996 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9998 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10000 /* Jump to END_LAB if SP == LAST_ADDR. */
10001 xops[0] = stack_pointer_rtx;
10003 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10004 fputs ("\tje\t", asm_out_file);
10005 assemble_name_raw (asm_out_file, end_lab);
10006 fputc ('\n', asm_out_file);
10008 /* SP = SP + PROBE_INTERVAL. */
10009 xops[1] = GEN_INT (PROBE_INTERVAL);
10010 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10013 xops[1] = const0_rtx;
10014 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10016 fprintf (asm_out_file, "\tjmp\t");
10017 assemble_name_raw (asm_out_file, loop_lab);
10018 fputc ('\n', asm_out_file);
10020 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10025 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10026 inclusive. These are offsets from the current stack pointer. */
10029 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10031 /* See if we have a constant small number of probes to generate. If so,
10032 that's the easy case. The run-time loop is made up of 7 insns in the
10033 generic case while the compile-time loop is made up of n insns for n #
10035 if (size <= 7 * PROBE_INTERVAL)
10039 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10040 it exceeds SIZE. If only one probe is needed, this will not
10041 generate any code. Then probe at FIRST + SIZE. */
10042 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10043 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10046 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10050 /* Otherwise, do the same as above, but in a loop. Note that we must be
10051 extra careful with variables wrapping around because we might be at
10052 the very top (or the very bottom) of the address space and we have
10053 to be able to handle this case properly; in particular, we use an
10054 equality test for the loop condition. */
10057 HOST_WIDE_INT rounded_size, last;
10058 struct scratch_reg sr;
10060 get_scratch_register_on_entry (&sr);
10063 /* Step 1: round SIZE to the previous multiple of the interval. */
10065 rounded_size = size & -PROBE_INTERVAL;
10068 /* Step 2: compute initial and final value of the loop counter. */
10070 /* TEST_OFFSET = FIRST. */
10071 emit_move_insn (sr.reg, GEN_INT (-first));
10073 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10074 last = first + rounded_size;
10077 /* Step 3: the loop
10079 while (TEST_ADDR != LAST_ADDR)
10081 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10085 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10086 until it is equal to ROUNDED_SIZE. */
10088 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10091 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10092 that SIZE is equal to ROUNDED_SIZE. */
10094 if (size != rounded_size)
10095 emit_stack_probe (plus_constant (Pmode,
10096 gen_rtx_PLUS (Pmode,
10099 rounded_size - size));
10101 release_scratch_register_on_entry (&sr);
10104 /* Make sure nothing is scheduled before we are done. */
10105 emit_insn (gen_blockage ());
10108 /* Probe a range of stack addresses from REG to END, inclusive. These are
10109 offsets from the current stack pointer. */
10112 output_probe_stack_range (rtx reg, rtx end)
10114 static int labelno = 0;
10115 char loop_lab[32], end_lab[32];
10118 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10119 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10121 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10123 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10126 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10127 fputs ("\tje\t", asm_out_file);
10128 assemble_name_raw (asm_out_file, end_lab);
10129 fputc ('\n', asm_out_file);
10131 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10132 xops[1] = GEN_INT (PROBE_INTERVAL);
10133 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10135 /* Probe at TEST_ADDR. */
10136 xops[0] = stack_pointer_rtx;
10138 xops[2] = const0_rtx;
10139 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10141 fprintf (asm_out_file, "\tjmp\t");
10142 assemble_name_raw (asm_out_file, loop_lab);
10143 fputc ('\n', asm_out_file);
10145 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10150 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10151 to be generated in correct form. */
10153 ix86_finalize_stack_realign_flags (void)
10155 /* Check if stack realign is really needed after reload, and
10156 stores result in cfun */
10157 unsigned int incoming_stack_boundary
10158 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10159 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10160 unsigned int stack_realign = (incoming_stack_boundary
10162 ? crtl->max_used_stack_slot_alignment
10163 : crtl->stack_alignment_needed));
10165 if (crtl->stack_realign_finalized)
10167 /* After stack_realign_needed is finalized, we can't no longer
10169 gcc_assert (crtl->stack_realign_needed == stack_realign);
10173 /* If the only reason for frame_pointer_needed is that we conservatively
10174 assumed stack realignment might be needed, but in the end nothing that
10175 needed the stack alignment had been spilled, clear frame_pointer_needed
10176 and say we don't need stack realignment. */
10178 && !crtl->need_drap
10179 && frame_pointer_needed
10181 && flag_omit_frame_pointer
10182 && crtl->sp_is_unchanging
10183 && !ix86_current_function_calls_tls_descriptor
10184 && !crtl->accesses_prior_frames
10185 && !cfun->calls_alloca
10186 && !crtl->calls_eh_return
10187 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10188 && !ix86_frame_pointer_required ()
10189 && get_frame_size () == 0
10190 && ix86_nsaved_sseregs () == 0
10191 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10193 HARD_REG_SET set_up_by_prologue, prologue_used;
10196 CLEAR_HARD_REG_SET (prologue_used);
10197 CLEAR_HARD_REG_SET (set_up_by_prologue);
10198 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10199 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10200 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10201 HARD_FRAME_POINTER_REGNUM);
10205 FOR_BB_INSNS (bb, insn)
10206 if (NONDEBUG_INSN_P (insn)
10207 && requires_stack_frame_p (insn, prologue_used,
10208 set_up_by_prologue))
10210 crtl->stack_realign_needed = stack_realign;
10211 crtl->stack_realign_finalized = true;
10216 frame_pointer_needed = false;
10217 stack_realign = false;
10218 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10219 crtl->stack_alignment_needed = incoming_stack_boundary;
10220 crtl->stack_alignment_estimated = incoming_stack_boundary;
10221 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10222 crtl->preferred_stack_boundary = incoming_stack_boundary;
10223 df_finish_pass (true);
10224 df_scan_alloc (NULL);
10226 df_compute_regs_ever_live (true);
10230 crtl->stack_realign_needed = stack_realign;
10231 crtl->stack_realign_finalized = true;
10234 /* Expand the prologue into a bunch of separate insns. */
10237 ix86_expand_prologue (void)
10239 struct machine_function *m = cfun->machine;
10242 struct ix86_frame frame;
10243 HOST_WIDE_INT allocate;
10244 bool int_registers_saved;
10245 bool sse_registers_saved;
10247 ix86_finalize_stack_realign_flags ();
10249 /* DRAP should not coexist with stack_realign_fp */
10250 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10252 memset (&m->fs, 0, sizeof (m->fs));
10254 /* Initialize CFA state for before the prologue. */
10255 m->fs.cfa_reg = stack_pointer_rtx;
10256 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10258 /* Track SP offset to the CFA. We continue tracking this after we've
10259 swapped the CFA register away from SP. In the case of re-alignment
10260 this is fudged; we're interested to offsets within the local frame. */
10261 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10262 m->fs.sp_valid = true;
10264 ix86_compute_frame_layout (&frame);
10266 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10268 /* We should have already generated an error for any use of
10269 ms_hook on a nested function. */
10270 gcc_checking_assert (!ix86_static_chain_on_stack);
10272 /* Check if profiling is active and we shall use profiling before
10273 prologue variant. If so sorry. */
10274 if (crtl->profile && flag_fentry != 0)
10275 sorry ("ms_hook_prologue attribute isn%'t compatible "
10276 "with -mfentry for 32-bit");
10278 /* In ix86_asm_output_function_label we emitted:
10279 8b ff movl.s %edi,%edi
10281 8b ec movl.s %esp,%ebp
10283 This matches the hookable function prologue in Win32 API
10284 functions in Microsoft Windows XP Service Pack 2 and newer.
10285 Wine uses this to enable Windows apps to hook the Win32 API
10286 functions provided by Wine.
10288 What that means is that we've already set up the frame pointer. */
10290 if (frame_pointer_needed
10291 && !(crtl->drap_reg && crtl->stack_realign_needed))
10295 /* We've decided to use the frame pointer already set up.
10296 Describe this to the unwinder by pretending that both
10297 push and mov insns happen right here.
10299 Putting the unwind info here at the end of the ms_hook
10300 is done so that we can make absolutely certain we get
10301 the required byte sequence at the start of the function,
10302 rather than relying on an assembler that can produce
10303 the exact encoding required.
10305 However it does mean (in the unpatched case) that we have
10306 a 1 insn window where the asynchronous unwind info is
10307 incorrect. However, if we placed the unwind info at
10308 its correct location we would have incorrect unwind info
10309 in the patched case. Which is probably all moot since
10310 I don't expect Wine generates dwarf2 unwind info for the
10311 system libraries that use this feature. */
10313 insn = emit_insn (gen_blockage ());
10315 push = gen_push (hard_frame_pointer_rtx);
10316 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10317 stack_pointer_rtx);
10318 RTX_FRAME_RELATED_P (push) = 1;
10319 RTX_FRAME_RELATED_P (mov) = 1;
10321 RTX_FRAME_RELATED_P (insn) = 1;
10322 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10323 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10325 /* Note that gen_push incremented m->fs.cfa_offset, even
10326 though we didn't emit the push insn here. */
10327 m->fs.cfa_reg = hard_frame_pointer_rtx;
10328 m->fs.fp_offset = m->fs.cfa_offset;
10329 m->fs.fp_valid = true;
10333 /* The frame pointer is not needed so pop %ebp again.
10334 This leaves us with a pristine state. */
10335 emit_insn (gen_pop (hard_frame_pointer_rtx));
10339 /* The first insn of a function that accepts its static chain on the
10340 stack is to push the register that would be filled in by a direct
10341 call. This insn will be skipped by the trampoline. */
10342 else if (ix86_static_chain_on_stack)
10344 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10345 emit_insn (gen_blockage ());
10347 /* We don't want to interpret this push insn as a register save,
10348 only as a stack adjustment. The real copy of the register as
10349 a save will be done later, if needed. */
10350 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10351 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10352 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10353 RTX_FRAME_RELATED_P (insn) = 1;
10356 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10357 of DRAP is needed and stack realignment is really needed after reload */
10358 if (stack_realign_drap)
10360 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10362 /* Only need to push parameter pointer reg if it is caller saved. */
10363 if (!call_used_regs[REGNO (crtl->drap_reg)])
10365 /* Push arg pointer reg */
10366 insn = emit_insn (gen_push (crtl->drap_reg));
10367 RTX_FRAME_RELATED_P (insn) = 1;
10370 /* Grab the argument pointer. */
10371 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10372 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10373 RTX_FRAME_RELATED_P (insn) = 1;
10374 m->fs.cfa_reg = crtl->drap_reg;
10375 m->fs.cfa_offset = 0;
10377 /* Align the stack. */
10378 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10380 GEN_INT (-align_bytes)));
10381 RTX_FRAME_RELATED_P (insn) = 1;
10383 /* Replicate the return address on the stack so that return
10384 address can be reached via (argp - 1) slot. This is needed
10385 to implement macro RETURN_ADDR_RTX and intrinsic function
10386 expand_builtin_return_addr etc. */
10387 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10388 t = gen_frame_mem (word_mode, t);
10389 insn = emit_insn (gen_push (t));
10390 RTX_FRAME_RELATED_P (insn) = 1;
10392 /* For the purposes of frame and register save area addressing,
10393 we've started over with a new frame. */
10394 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10395 m->fs.realigned = true;
10398 int_registers_saved = (frame.nregs == 0);
10399 sse_registers_saved = (frame.nsseregs == 0);
10401 if (frame_pointer_needed && !m->fs.fp_valid)
10403 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10404 slower on all targets. Also sdb doesn't like it. */
10405 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10406 RTX_FRAME_RELATED_P (insn) = 1;
10408 /* Push registers now, before setting the frame pointer
10410 if (!int_registers_saved
10412 && !frame.save_regs_using_mov)
10414 ix86_emit_save_regs ();
10415 int_registers_saved = true;
10416 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10419 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10421 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10422 RTX_FRAME_RELATED_P (insn) = 1;
10424 if (m->fs.cfa_reg == stack_pointer_rtx)
10425 m->fs.cfa_reg = hard_frame_pointer_rtx;
10426 m->fs.fp_offset = m->fs.sp_offset;
10427 m->fs.fp_valid = true;
10431 if (!int_registers_saved)
10433 /* If saving registers via PUSH, do so now. */
10434 if (!frame.save_regs_using_mov)
10436 ix86_emit_save_regs ();
10437 int_registers_saved = true;
10438 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10441 /* When using red zone we may start register saving before allocating
10442 the stack frame saving one cycle of the prologue. However, avoid
10443 doing this if we have to probe the stack; at least on x86_64 the
10444 stack probe can turn into a call that clobbers a red zone location. */
10445 else if (ix86_using_red_zone ()
10446 && (! TARGET_STACK_PROBE
10447 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10449 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10450 int_registers_saved = true;
10454 if (stack_realign_fp)
10456 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10457 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10459 /* The computation of the size of the re-aligned stack frame means
10460 that we must allocate the size of the register save area before
10461 performing the actual alignment. Otherwise we cannot guarantee
10462 that there's enough storage above the realignment point. */
10463 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10464 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10465 GEN_INT (m->fs.sp_offset
10466 - frame.sse_reg_save_offset),
10469 /* Align the stack. */
10470 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10472 GEN_INT (-align_bytes)));
10474 /* For the purposes of register save area addressing, the stack
10475 pointer is no longer valid. As for the value of sp_offset,
10476 see ix86_compute_frame_layout, which we need to match in order
10477 to pass verification of stack_pointer_offset at the end. */
10478 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10479 m->fs.sp_valid = false;
10482 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10484 if (flag_stack_usage_info)
10486 /* We start to count from ARG_POINTER. */
10487 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10489 /* If it was realigned, take into account the fake frame. */
10490 if (stack_realign_drap)
10492 if (ix86_static_chain_on_stack)
10493 stack_size += UNITS_PER_WORD;
10495 if (!call_used_regs[REGNO (crtl->drap_reg)])
10496 stack_size += UNITS_PER_WORD;
10498 /* This over-estimates by 1 minimal-stack-alignment-unit but
10499 mitigates that by counting in the new return address slot. */
10500 current_function_dynamic_stack_size
10501 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10504 current_function_static_stack_size = stack_size;
10507 /* On SEH target with very large frame size, allocate an area to save
10508 SSE registers (as the very large allocation won't be described). */
10510 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10511 && !sse_registers_saved)
10513 HOST_WIDE_INT sse_size =
10514 frame.sse_reg_save_offset - frame.reg_save_offset;
10516 gcc_assert (int_registers_saved);
10518 /* No need to do stack checking as the area will be immediately
10520 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10521 GEN_INT (-sse_size), -1,
10522 m->fs.cfa_reg == stack_pointer_rtx);
10523 allocate -= sse_size;
10524 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10525 sse_registers_saved = true;
10528 /* The stack has already been decremented by the instruction calling us
10529 so probe if the size is non-negative to preserve the protection area. */
10530 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10532 /* We expect the registers to be saved when probes are used. */
10533 gcc_assert (int_registers_saved);
10535 if (STACK_CHECK_MOVING_SP)
10537 ix86_adjust_stack_and_probe (allocate);
10542 HOST_WIDE_INT size = allocate;
10544 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10545 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10547 if (TARGET_STACK_PROBE)
10548 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10550 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10556 else if (!ix86_target_stack_probe ()
10557 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10559 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10560 GEN_INT (-allocate), -1,
10561 m->fs.cfa_reg == stack_pointer_rtx);
10565 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10567 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10568 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
10569 bool eax_live = false;
10570 bool r10_live = false;
10573 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10574 if (!TARGET_64BIT_MS_ABI)
10575 eax_live = ix86_eax_live_at_start_p ();
10577 /* Note that SEH directives need to continue tracking the stack
10578 pointer even after the frame pointer has been set up. */
10581 insn = emit_insn (gen_push (eax));
10582 allocate -= UNITS_PER_WORD;
10583 if (sp_is_cfa_reg || TARGET_SEH)
10586 m->fs.cfa_offset += UNITS_PER_WORD;
10587 RTX_FRAME_RELATED_P (insn) = 1;
10593 r10 = gen_rtx_REG (Pmode, R10_REG);
10594 insn = emit_insn (gen_push (r10));
10595 allocate -= UNITS_PER_WORD;
10596 if (sp_is_cfa_reg || TARGET_SEH)
10599 m->fs.cfa_offset += UNITS_PER_WORD;
10600 RTX_FRAME_RELATED_P (insn) = 1;
10604 emit_move_insn (eax, GEN_INT (allocate));
10605 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10607 /* Use the fact that AX still contains ALLOCATE. */
10608 adjust_stack_insn = (Pmode == DImode
10609 ? gen_pro_epilogue_adjust_stack_di_sub
10610 : gen_pro_epilogue_adjust_stack_si_sub);
10612 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10613 stack_pointer_rtx, eax));
10615 if (sp_is_cfa_reg || TARGET_SEH)
10618 m->fs.cfa_offset += allocate;
10619 RTX_FRAME_RELATED_P (insn) = 1;
10620 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10621 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10622 plus_constant (Pmode, stack_pointer_rtx,
10625 m->fs.sp_offset += allocate;
10627 if (r10_live && eax_live)
10629 t = choose_baseaddr (m->fs.sp_offset - allocate);
10630 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10631 gen_frame_mem (word_mode, t));
10632 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10633 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
10634 gen_frame_mem (word_mode, t));
10636 else if (eax_live || r10_live)
10638 t = choose_baseaddr (m->fs.sp_offset - allocate);
10639 emit_move_insn (gen_rtx_REG (word_mode,
10640 (eax_live ? AX_REG : R10_REG)),
10641 gen_frame_mem (word_mode, t));
10644 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10646 /* If we havn't already set up the frame pointer, do so now. */
10647 if (frame_pointer_needed && !m->fs.fp_valid)
10649 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10650 GEN_INT (frame.stack_pointer_offset
10651 - frame.hard_frame_pointer_offset));
10652 insn = emit_insn (insn);
10653 RTX_FRAME_RELATED_P (insn) = 1;
10654 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10656 if (m->fs.cfa_reg == stack_pointer_rtx)
10657 m->fs.cfa_reg = hard_frame_pointer_rtx;
10658 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10659 m->fs.fp_valid = true;
10662 if (!int_registers_saved)
10663 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10664 if (!sse_registers_saved)
10665 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10667 pic_reg_used = false;
10668 if (pic_offset_table_rtx
10669 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10672 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10674 if (alt_pic_reg_used != INVALID_REGNUM)
10675 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10677 pic_reg_used = true;
10684 if (ix86_cmodel == CM_LARGE_PIC)
10686 rtx label, tmp_reg;
10688 gcc_assert (Pmode == DImode);
10689 label = gen_label_rtx ();
10690 emit_label (label);
10691 LABEL_PRESERVE_P (label) = 1;
10692 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
10693 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10694 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
10696 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10697 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
10698 pic_offset_table_rtx, tmp_reg));
10701 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10705 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10706 RTX_FRAME_RELATED_P (insn) = 1;
10707 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10711 /* In the pic_reg_used case, make sure that the got load isn't deleted
10712 when mcount needs it. Blockage to avoid call movement across mcount
10713 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10715 if (crtl->profile && !flag_fentry && pic_reg_used)
10716 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10718 if (crtl->drap_reg && !crtl->stack_realign_needed)
10720 /* vDRAP is setup but after reload it turns out stack realign
10721 isn't necessary, here we will emit prologue to setup DRAP
10722 without stack realign adjustment */
10723 t = choose_baseaddr (0);
10724 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10727 /* Prevent instructions from being scheduled into register save push
10728 sequence when access to the redzone area is done through frame pointer.
10729 The offset between the frame pointer and the stack pointer is calculated
10730 relative to the value of the stack pointer at the end of the function
10731 prologue, and moving instructions that access redzone area via frame
10732 pointer inside push sequence violates this assumption. */
10733 if (frame_pointer_needed && frame.red_zone_size)
10734 emit_insn (gen_memory_blockage ());
10736 /* Emit cld instruction if stringops are used in the function. */
10737 if (TARGET_CLD && ix86_current_function_needs_cld)
10738 emit_insn (gen_cld ());
10740 /* SEH requires that the prologue end within 256 bytes of the start of
10741 the function. Prevent instruction schedules that would extend that.
10742 Further, prevent alloca modifications to the stack pointer from being
10743 combined with prologue modifications. */
10745 emit_insn (gen_prologue_use (stack_pointer_rtx));
10748 /* Emit code to restore REG using a POP insn. */
10751 ix86_emit_restore_reg_using_pop (rtx reg)
10753 struct machine_function *m = cfun->machine;
10754 rtx insn = emit_insn (gen_pop (reg));
10756 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10757 m->fs.sp_offset -= UNITS_PER_WORD;
10759 if (m->fs.cfa_reg == crtl->drap_reg
10760 && REGNO (reg) == REGNO (crtl->drap_reg))
10762 /* Previously we'd represented the CFA as an expression
10763 like *(%ebp - 8). We've just popped that value from
10764 the stack, which means we need to reset the CFA to
10765 the drap register. This will remain until we restore
10766 the stack pointer. */
10767 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10768 RTX_FRAME_RELATED_P (insn) = 1;
10770 /* This means that the DRAP register is valid for addressing too. */
10771 m->fs.drap_valid = true;
10775 if (m->fs.cfa_reg == stack_pointer_rtx)
10777 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10778 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10779 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10780 RTX_FRAME_RELATED_P (insn) = 1;
10782 m->fs.cfa_offset -= UNITS_PER_WORD;
10785 /* When the frame pointer is the CFA, and we pop it, we are
10786 swapping back to the stack pointer as the CFA. This happens
10787 for stack frames that don't allocate other data, so we assume
10788 the stack pointer is now pointing at the return address, i.e.
10789 the function entry state, which makes the offset be 1 word. */
10790 if (reg == hard_frame_pointer_rtx)
10792 m->fs.fp_valid = false;
10793 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10795 m->fs.cfa_reg = stack_pointer_rtx;
10796 m->fs.cfa_offset -= UNITS_PER_WORD;
10798 add_reg_note (insn, REG_CFA_DEF_CFA,
10799 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10800 GEN_INT (m->fs.cfa_offset)));
10801 RTX_FRAME_RELATED_P (insn) = 1;
10806 /* Emit code to restore saved registers using POP insns. */
10809 ix86_emit_restore_regs_using_pop (void)
10811 unsigned int regno;
10813 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10814 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10815 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
10818 /* Emit code and notes for the LEAVE instruction. */
10821 ix86_emit_leave (void)
10823 struct machine_function *m = cfun->machine;
10824 rtx insn = emit_insn (ix86_gen_leave ());
10826 ix86_add_queued_cfa_restore_notes (insn);
10828 gcc_assert (m->fs.fp_valid);
10829 m->fs.sp_valid = true;
10830 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10831 m->fs.fp_valid = false;
10833 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10835 m->fs.cfa_reg = stack_pointer_rtx;
10836 m->fs.cfa_offset = m->fs.sp_offset;
10838 add_reg_note (insn, REG_CFA_DEF_CFA,
10839 plus_constant (Pmode, stack_pointer_rtx,
10841 RTX_FRAME_RELATED_P (insn) = 1;
10843 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10847 /* Emit code to restore saved registers using MOV insns.
10848 First register is restored from CFA - CFA_OFFSET. */
10850 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10851 bool maybe_eh_return)
10853 struct machine_function *m = cfun->machine;
10854 unsigned int regno;
10856 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10857 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10859 rtx reg = gen_rtx_REG (word_mode, regno);
10862 mem = choose_baseaddr (cfa_offset);
10863 mem = gen_frame_mem (word_mode, mem);
10864 insn = emit_move_insn (reg, mem);
10866 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10868 /* Previously we'd represented the CFA as an expression
10869 like *(%ebp - 8). We've just popped that value from
10870 the stack, which means we need to reset the CFA to
10871 the drap register. This will remain until we restore
10872 the stack pointer. */
10873 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10874 RTX_FRAME_RELATED_P (insn) = 1;
10876 /* This means that the DRAP register is valid for addressing. */
10877 m->fs.drap_valid = true;
10880 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10882 cfa_offset -= UNITS_PER_WORD;
10886 /* Emit code to restore saved registers using MOV insns.
10887 First register is restored from CFA - CFA_OFFSET. */
10889 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10890 bool maybe_eh_return)
10892 unsigned int regno;
10894 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10895 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10897 rtx reg = gen_rtx_REG (V4SFmode, regno);
10900 mem = choose_baseaddr (cfa_offset);
10901 mem = gen_rtx_MEM (V4SFmode, mem);
10902 set_mem_align (mem, 128);
10903 emit_move_insn (reg, mem);
10905 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10911 /* Restore function stack, frame, and registers. */
10914 ix86_expand_epilogue (int style)
10916 struct machine_function *m = cfun->machine;
10917 struct machine_frame_state frame_state_save = m->fs;
10918 struct ix86_frame frame;
10919 bool restore_regs_via_mov;
10922 ix86_finalize_stack_realign_flags ();
10923 ix86_compute_frame_layout (&frame);
10925 m->fs.sp_valid = (!frame_pointer_needed
10926 || (crtl->sp_is_unchanging
10927 && !stack_realign_fp));
10928 gcc_assert (!m->fs.sp_valid
10929 || m->fs.sp_offset == frame.stack_pointer_offset);
10931 /* The FP must be valid if the frame pointer is present. */
10932 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10933 gcc_assert (!m->fs.fp_valid
10934 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10936 /* We must have *some* valid pointer to the stack frame. */
10937 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10939 /* The DRAP is never valid at this point. */
10940 gcc_assert (!m->fs.drap_valid);
10942 /* See the comment about red zone and frame
10943 pointer usage in ix86_expand_prologue. */
10944 if (frame_pointer_needed && frame.red_zone_size)
10945 emit_insn (gen_memory_blockage ());
10947 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10948 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10950 /* Determine the CFA offset of the end of the red-zone. */
10951 m->fs.red_zone_offset = 0;
10952 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10954 /* The red-zone begins below the return address. */
10955 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10957 /* When the register save area is in the aligned portion of
10958 the stack, determine the maximum runtime displacement that
10959 matches up with the aligned frame. */
10960 if (stack_realign_drap)
10961 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10965 /* Special care must be taken for the normal return case of a function
10966 using eh_return: the eax and edx registers are marked as saved, but
10967 not restored along this path. Adjust the save location to match. */
10968 if (crtl->calls_eh_return && style != 2)
10969 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10971 /* EH_RETURN requires the use of moves to function properly. */
10972 if (crtl->calls_eh_return)
10973 restore_regs_via_mov = true;
10974 /* SEH requires the use of pops to identify the epilogue. */
10975 else if (TARGET_SEH)
10976 restore_regs_via_mov = false;
10977 /* If we're only restoring one register and sp is not valid then
10978 using a move instruction to restore the register since it's
10979 less work than reloading sp and popping the register. */
10980 else if (!m->fs.sp_valid && frame.nregs <= 1)
10981 restore_regs_via_mov = true;
10982 else if (TARGET_EPILOGUE_USING_MOVE
10983 && cfun->machine->use_fast_prologue_epilogue
10984 && (frame.nregs > 1
10985 || m->fs.sp_offset != frame.reg_save_offset))
10986 restore_regs_via_mov = true;
10987 else if (frame_pointer_needed
10989 && m->fs.sp_offset != frame.reg_save_offset)
10990 restore_regs_via_mov = true;
10991 else if (frame_pointer_needed
10992 && TARGET_USE_LEAVE
10993 && cfun->machine->use_fast_prologue_epilogue
10994 && frame.nregs == 1)
10995 restore_regs_via_mov = true;
10997 restore_regs_via_mov = false;
10999 if (restore_regs_via_mov || frame.nsseregs)
11001 /* Ensure that the entire register save area is addressable via
11002 the stack pointer, if we will restore via sp. */
11004 && m->fs.sp_offset > 0x7fffffff
11005 && !(m->fs.fp_valid || m->fs.drap_valid)
11006 && (frame.nsseregs + frame.nregs) != 0)
11008 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11009 GEN_INT (m->fs.sp_offset
11010 - frame.sse_reg_save_offset),
11012 m->fs.cfa_reg == stack_pointer_rtx);
11016 /* If there are any SSE registers to restore, then we have to do it
11017 via moves, since there's obviously no pop for SSE regs. */
11018 if (frame.nsseregs)
11019 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11022 if (restore_regs_via_mov)
11027 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11029 /* eh_return epilogues need %ecx added to the stack pointer. */
11032 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11034 /* Stack align doesn't work with eh_return. */
11035 gcc_assert (!stack_realign_drap);
11036 /* Neither does regparm nested functions. */
11037 gcc_assert (!ix86_static_chain_on_stack);
11039 if (frame_pointer_needed)
11041 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11042 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11043 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11045 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11046 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11048 /* Note that we use SA as a temporary CFA, as the return
11049 address is at the proper place relative to it. We
11050 pretend this happens at the FP restore insn because
11051 prior to this insn the FP would be stored at the wrong
11052 offset relative to SA, and after this insn we have no
11053 other reasonable register to use for the CFA. We don't
11054 bother resetting the CFA to the SP for the duration of
11055 the return insn. */
11056 add_reg_note (insn, REG_CFA_DEF_CFA,
11057 plus_constant (Pmode, sa, UNITS_PER_WORD));
11058 ix86_add_queued_cfa_restore_notes (insn);
11059 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11060 RTX_FRAME_RELATED_P (insn) = 1;
11062 m->fs.cfa_reg = sa;
11063 m->fs.cfa_offset = UNITS_PER_WORD;
11064 m->fs.fp_valid = false;
11066 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11067 const0_rtx, style, false);
11071 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11072 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11073 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11074 ix86_add_queued_cfa_restore_notes (insn);
11076 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11077 if (m->fs.cfa_offset != UNITS_PER_WORD)
11079 m->fs.cfa_offset = UNITS_PER_WORD;
11080 add_reg_note (insn, REG_CFA_DEF_CFA,
11081 plus_constant (Pmode, stack_pointer_rtx,
11083 RTX_FRAME_RELATED_P (insn) = 1;
11086 m->fs.sp_offset = UNITS_PER_WORD;
11087 m->fs.sp_valid = true;
11092 /* SEH requires that the function end with (1) a stack adjustment
11093 if necessary, (2) a sequence of pops, and (3) a return or
11094 jump instruction. Prevent insns from the function body from
11095 being scheduled into this sequence. */
11098 /* Prevent a catch region from being adjacent to the standard
11099 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11100 several other flags that would be interesting to test are
11102 if (flag_non_call_exceptions)
11103 emit_insn (gen_nops (const1_rtx));
11105 emit_insn (gen_blockage ());
11108 /* First step is to deallocate the stack frame so that we can
11109 pop the registers. Also do it on SEH target for very large
11110 frame as the emitted instructions aren't allowed by the ABI in
11112 if (!m->fs.sp_valid
11114 && (m->fs.sp_offset - frame.reg_save_offset
11115 >= SEH_MAX_FRAME_SIZE)))
11117 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11118 GEN_INT (m->fs.fp_offset
11119 - frame.reg_save_offset),
11122 else if (m->fs.sp_offset != frame.reg_save_offset)
11124 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11125 GEN_INT (m->fs.sp_offset
11126 - frame.reg_save_offset),
11128 m->fs.cfa_reg == stack_pointer_rtx);
11131 ix86_emit_restore_regs_using_pop ();
11134 /* If we used a stack pointer and haven't already got rid of it,
11136 if (m->fs.fp_valid)
11138 /* If the stack pointer is valid and pointing at the frame
11139 pointer store address, then we only need a pop. */
11140 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11141 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11142 /* Leave results in shorter dependency chains on CPUs that are
11143 able to grok it fast. */
11144 else if (TARGET_USE_LEAVE
11145 || optimize_function_for_size_p (cfun)
11146 || !cfun->machine->use_fast_prologue_epilogue)
11147 ix86_emit_leave ();
11150 pro_epilogue_adjust_stack (stack_pointer_rtx,
11151 hard_frame_pointer_rtx,
11152 const0_rtx, style, !using_drap);
11153 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11159 int param_ptr_offset = UNITS_PER_WORD;
11162 gcc_assert (stack_realign_drap);
11164 if (ix86_static_chain_on_stack)
11165 param_ptr_offset += UNITS_PER_WORD;
11166 if (!call_used_regs[REGNO (crtl->drap_reg)])
11167 param_ptr_offset += UNITS_PER_WORD;
11169 insn = emit_insn (gen_rtx_SET
11170 (VOIDmode, stack_pointer_rtx,
11171 gen_rtx_PLUS (Pmode,
11173 GEN_INT (-param_ptr_offset))));
11174 m->fs.cfa_reg = stack_pointer_rtx;
11175 m->fs.cfa_offset = param_ptr_offset;
11176 m->fs.sp_offset = param_ptr_offset;
11177 m->fs.realigned = false;
11179 add_reg_note (insn, REG_CFA_DEF_CFA,
11180 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11181 GEN_INT (param_ptr_offset)));
11182 RTX_FRAME_RELATED_P (insn) = 1;
11184 if (!call_used_regs[REGNO (crtl->drap_reg)])
11185 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11188 /* At this point the stack pointer must be valid, and we must have
11189 restored all of the registers. We may not have deallocated the
11190 entire stack frame. We've delayed this until now because it may
11191 be possible to merge the local stack deallocation with the
11192 deallocation forced by ix86_static_chain_on_stack. */
11193 gcc_assert (m->fs.sp_valid);
11194 gcc_assert (!m->fs.fp_valid);
11195 gcc_assert (!m->fs.realigned);
11196 if (m->fs.sp_offset != UNITS_PER_WORD)
11198 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11199 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11203 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11205 /* Sibcall epilogues don't want a return instruction. */
11208 m->fs = frame_state_save;
11212 if (crtl->args.pops_args && crtl->args.size)
11214 rtx popc = GEN_INT (crtl->args.pops_args);
11216 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11217 address, do explicit add, and jump indirectly to the caller. */
11219 if (crtl->args.pops_args >= 65536)
11221 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11224 /* There is no "pascal" calling convention in any 64bit ABI. */
11225 gcc_assert (!TARGET_64BIT);
11227 insn = emit_insn (gen_pop (ecx));
11228 m->fs.cfa_offset -= UNITS_PER_WORD;
11229 m->fs.sp_offset -= UNITS_PER_WORD;
11231 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11232 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11233 add_reg_note (insn, REG_CFA_REGISTER,
11234 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11235 RTX_FRAME_RELATED_P (insn) = 1;
11237 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11239 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11242 emit_jump_insn (gen_simple_return_pop_internal (popc));
11245 emit_jump_insn (gen_simple_return_internal ());
11247 /* Restore the state back to the state from the prologue,
11248 so that it's correct for the next epilogue. */
11249 m->fs = frame_state_save;
11252 /* Reset from the function's potential modifications. */
11255 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11256 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11258 if (pic_offset_table_rtx)
11259 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11261 /* Mach-O doesn't support labels at the end of objects, so if
11262 it looks like we might want one, insert a NOP. */
11264 rtx insn = get_last_insn ();
11265 rtx deleted_debug_label = NULL_RTX;
11268 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11270 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11271 notes only, instead set their CODE_LABEL_NUMBER to -1,
11272 otherwise there would be code generation differences
11273 in between -g and -g0. */
11274 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11275 deleted_debug_label = insn;
11276 insn = PREV_INSN (insn);
11281 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11282 fputs ("\tnop\n", file);
11283 else if (deleted_debug_label)
11284 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11285 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11286 CODE_LABEL_NUMBER (insn) = -1;
11292 /* Return a scratch register to use in the split stack prologue. The
11293 split stack prologue is used for -fsplit-stack. It is the first
11294 instructions in the function, even before the regular prologue.
11295 The scratch register can be any caller-saved register which is not
11296 used for parameters or for the static chain. */
11298 static unsigned int
11299 split_stack_prologue_scratch_regno (void)
11305 bool is_fastcall, is_thiscall;
11308 is_fastcall = (lookup_attribute ("fastcall",
11309 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11311 is_thiscall = (lookup_attribute ("thiscall",
11312 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11314 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11318 if (DECL_STATIC_CHAIN (cfun->decl))
11320 sorry ("-fsplit-stack does not support fastcall with "
11321 "nested function");
11322 return INVALID_REGNUM;
11326 else if (is_thiscall)
11328 if (!DECL_STATIC_CHAIN (cfun->decl))
11332 else if (regparm < 3)
11334 if (!DECL_STATIC_CHAIN (cfun->decl))
11340 sorry ("-fsplit-stack does not support 2 register "
11341 " parameters for a nested function");
11342 return INVALID_REGNUM;
11349 /* FIXME: We could make this work by pushing a register
11350 around the addition and comparison. */
11351 sorry ("-fsplit-stack does not support 3 register parameters");
11352 return INVALID_REGNUM;
11357 /* A SYMBOL_REF for the function which allocates new stackspace for
11360 static GTY(()) rtx split_stack_fn;
11362 /* A SYMBOL_REF for the more stack function when using the large
11365 static GTY(()) rtx split_stack_fn_large;
11367 /* Handle -fsplit-stack. These are the first instructions in the
11368 function, even before the regular prologue. */
11371 ix86_expand_split_stack_prologue (void)
11373 struct ix86_frame frame;
11374 HOST_WIDE_INT allocate;
11375 unsigned HOST_WIDE_INT args_size;
11376 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11377 rtx scratch_reg = NULL_RTX;
11378 rtx varargs_label = NULL_RTX;
11381 gcc_assert (flag_split_stack && reload_completed);
11383 ix86_finalize_stack_realign_flags ();
11384 ix86_compute_frame_layout (&frame);
11385 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11387 /* This is the label we will branch to if we have enough stack
11388 space. We expect the basic block reordering pass to reverse this
11389 branch if optimizing, so that we branch in the unlikely case. */
11390 label = gen_label_rtx ();
11392 /* We need to compare the stack pointer minus the frame size with
11393 the stack boundary in the TCB. The stack boundary always gives
11394 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11395 can compare directly. Otherwise we need to do an addition. */
11397 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11398 UNSPEC_STACK_CHECK);
11399 limit = gen_rtx_CONST (Pmode, limit);
11400 limit = gen_rtx_MEM (Pmode, limit);
11401 if (allocate < SPLIT_STACK_AVAILABLE)
11402 current = stack_pointer_rtx;
11405 unsigned int scratch_regno;
11408 /* We need a scratch register to hold the stack pointer minus
11409 the required frame size. Since this is the very start of the
11410 function, the scratch register can be any caller-saved
11411 register which is not used for parameters. */
11412 offset = GEN_INT (- allocate);
11413 scratch_regno = split_stack_prologue_scratch_regno ();
11414 if (scratch_regno == INVALID_REGNUM)
11416 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11417 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11419 /* We don't use ix86_gen_add3 in this case because it will
11420 want to split to lea, but when not optimizing the insn
11421 will not be split after this point. */
11422 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11423 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11428 emit_move_insn (scratch_reg, offset);
11429 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11430 stack_pointer_rtx));
11432 current = scratch_reg;
11435 ix86_expand_branch (GEU, current, limit, label);
11436 jump_insn = get_last_insn ();
11437 JUMP_LABEL (jump_insn) = label;
11439 /* Mark the jump as very likely to be taken. */
11440 add_reg_note (jump_insn, REG_BR_PROB,
11441 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11443 if (split_stack_fn == NULL_RTX)
11444 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11445 fn = split_stack_fn;
11447 /* Get more stack space. We pass in the desired stack space and the
11448 size of the arguments to copy to the new stack. In 32-bit mode
11449 we push the parameters; __morestack will return on a new stack
11450 anyhow. In 64-bit mode we pass the parameters in r10 and
11452 allocate_rtx = GEN_INT (allocate);
11453 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11454 call_fusage = NULL_RTX;
11459 reg10 = gen_rtx_REG (Pmode, R10_REG);
11460 reg11 = gen_rtx_REG (Pmode, R11_REG);
11462 /* If this function uses a static chain, it will be in %r10.
11463 Preserve it across the call to __morestack. */
11464 if (DECL_STATIC_CHAIN (cfun->decl))
11468 rax = gen_rtx_REG (word_mode, AX_REG);
11469 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11470 use_reg (&call_fusage, rax);
11473 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11475 HOST_WIDE_INT argval;
11477 gcc_assert (Pmode == DImode);
11478 /* When using the large model we need to load the address
11479 into a register, and we've run out of registers. So we
11480 switch to a different calling convention, and we call a
11481 different function: __morestack_large. We pass the
11482 argument size in the upper 32 bits of r10 and pass the
11483 frame size in the lower 32 bits. */
11484 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11485 gcc_assert ((args_size & 0xffffffff) == args_size);
11487 if (split_stack_fn_large == NULL_RTX)
11488 split_stack_fn_large =
11489 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11491 if (ix86_cmodel == CM_LARGE_PIC)
11495 label = gen_label_rtx ();
11496 emit_label (label);
11497 LABEL_PRESERVE_P (label) = 1;
11498 emit_insn (gen_set_rip_rex64 (reg10, label));
11499 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11500 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11501 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11503 x = gen_rtx_CONST (Pmode, x);
11504 emit_move_insn (reg11, x);
11505 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11506 x = gen_const_mem (Pmode, x);
11507 emit_move_insn (reg11, x);
11510 emit_move_insn (reg11, split_stack_fn_large);
11514 argval = ((args_size << 16) << 16) + allocate;
11515 emit_move_insn (reg10, GEN_INT (argval));
11519 emit_move_insn (reg10, allocate_rtx);
11520 emit_move_insn (reg11, GEN_INT (args_size));
11521 use_reg (&call_fusage, reg11);
11524 use_reg (&call_fusage, reg10);
11528 emit_insn (gen_push (GEN_INT (args_size)));
11529 emit_insn (gen_push (allocate_rtx));
11531 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11532 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11534 add_function_usage_to (call_insn, call_fusage);
11536 /* In order to make call/return prediction work right, we now need
11537 to execute a return instruction. See
11538 libgcc/config/i386/morestack.S for the details on how this works.
11540 For flow purposes gcc must not see this as a return
11541 instruction--we need control flow to continue at the subsequent
11542 label. Therefore, we use an unspec. */
11543 gcc_assert (crtl->args.pops_args < 65536);
11544 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11546 /* If we are in 64-bit mode and this function uses a static chain,
11547 we saved %r10 in %rax before calling _morestack. */
11548 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11549 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11550 gen_rtx_REG (word_mode, AX_REG));
11552 /* If this function calls va_start, we need to store a pointer to
11553 the arguments on the old stack, because they may not have been
11554 all copied to the new stack. At this point the old stack can be
11555 found at the frame pointer value used by __morestack, because
11556 __morestack has set that up before calling back to us. Here we
11557 store that pointer in a scratch register, and in
11558 ix86_expand_prologue we store the scratch register in a stack
11560 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11562 unsigned int scratch_regno;
11566 scratch_regno = split_stack_prologue_scratch_regno ();
11567 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11568 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11572 return address within this function
11573 return address of caller of this function
11575 So we add three words to get to the stack arguments.
11579 return address within this function
11580 first argument to __morestack
11581 second argument to __morestack
11582 return address of caller of this function
11584 So we add five words to get to the stack arguments.
11586 words = TARGET_64BIT ? 3 : 5;
11587 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11588 gen_rtx_PLUS (Pmode, frame_reg,
11589 GEN_INT (words * UNITS_PER_WORD))));
11591 varargs_label = gen_label_rtx ();
11592 emit_jump_insn (gen_jump (varargs_label));
11593 JUMP_LABEL (get_last_insn ()) = varargs_label;
11598 emit_label (label);
11599 LABEL_NUSES (label) = 1;
11601 /* If this function calls va_start, we now have to set the scratch
11602 register for the case where we do not call __morestack. In this
11603 case we need to set it based on the stack pointer. */
11604 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11606 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11607 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11608 GEN_INT (UNITS_PER_WORD))));
11610 emit_label (varargs_label);
11611 LABEL_NUSES (varargs_label) = 1;
11615 /* We may have to tell the dataflow pass that the split stack prologue
11616 is initializing a scratch register. */
11619 ix86_live_on_entry (bitmap regs)
11621 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11623 gcc_assert (flag_split_stack);
11624 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11628 /* Determine if op is suitable SUBREG RTX for address. */
11631 ix86_address_subreg_operand (rtx op)
11633 enum machine_mode mode;
11638 mode = GET_MODE (op);
11640 if (GET_MODE_CLASS (mode) != MODE_INT)
11643 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11644 failures when the register is one word out of a two word structure. */
11645 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11648 /* Allow only SUBREGs of non-eliminable hard registers. */
11649 return register_no_elim_operand (op, mode);
11652 /* Extract the parts of an RTL expression that is a valid memory address
11653 for an instruction. Return 0 if the structure of the address is
11654 grossly off. Return -1 if the address contains ASHIFT, so it is not
11655 strictly valid, but still used for computing length of lea instruction. */
11658 ix86_decompose_address (rtx addr, struct ix86_address *out)
11660 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11661 rtx base_reg, index_reg;
11662 HOST_WIDE_INT scale = 1;
11663 rtx scale_rtx = NULL_RTX;
11666 enum ix86_address_seg seg = SEG_DEFAULT;
11668 /* Allow zero-extended SImode addresses,
11669 they will be emitted with addr32 prefix. */
11670 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11672 if (GET_CODE (addr) == ZERO_EXTEND
11673 && GET_MODE (XEXP (addr, 0)) == SImode)
11675 addr = XEXP (addr, 0);
11676 if (CONST_INT_P (addr))
11679 else if (GET_CODE (addr) == AND
11680 && const_32bit_mask (XEXP (addr, 1), DImode))
11682 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
11683 if (addr == NULL_RTX)
11686 if (CONST_INT_P (addr))
11691 /* Allow SImode subregs of DImode addresses,
11692 they will be emitted with addr32 prefix. */
11693 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11695 if (GET_CODE (addr) == SUBREG
11696 && GET_MODE (SUBREG_REG (addr)) == DImode)
11698 addr = SUBREG_REG (addr);
11699 if (CONST_INT_P (addr))
11706 else if (GET_CODE (addr) == SUBREG)
11708 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11713 else if (GET_CODE (addr) == PLUS)
11715 rtx addends[4], op;
11723 addends[n++] = XEXP (op, 1);
11726 while (GET_CODE (op) == PLUS);
11731 for (i = n; i >= 0; --i)
11734 switch (GET_CODE (op))
11739 index = XEXP (op, 0);
11740 scale_rtx = XEXP (op, 1);
11746 index = XEXP (op, 0);
11747 tmp = XEXP (op, 1);
11748 if (!CONST_INT_P (tmp))
11750 scale = INTVAL (tmp);
11751 if ((unsigned HOST_WIDE_INT) scale > 3)
11753 scale = 1 << scale;
11758 if (GET_CODE (op) != UNSPEC)
11763 if (XINT (op, 1) == UNSPEC_TP
11764 && TARGET_TLS_DIRECT_SEG_REFS
11765 && seg == SEG_DEFAULT)
11766 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11772 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11799 else if (GET_CODE (addr) == MULT)
11801 index = XEXP (addr, 0); /* index*scale */
11802 scale_rtx = XEXP (addr, 1);
11804 else if (GET_CODE (addr) == ASHIFT)
11806 /* We're called for lea too, which implements ashift on occasion. */
11807 index = XEXP (addr, 0);
11808 tmp = XEXP (addr, 1);
11809 if (!CONST_INT_P (tmp))
11811 scale = INTVAL (tmp);
11812 if ((unsigned HOST_WIDE_INT) scale > 3)
11814 scale = 1 << scale;
11817 else if (CONST_INT_P (addr))
11819 if (!x86_64_immediate_operand (addr, VOIDmode))
11822 /* Constant addresses are sign extended to 64bit, we have to
11823 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11825 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11831 disp = addr; /* displacement */
11837 else if (GET_CODE (index) == SUBREG
11838 && ix86_address_subreg_operand (SUBREG_REG (index)))
11844 /* Address override works only on the (%reg) part of %fs:(%reg). */
11845 if (seg != SEG_DEFAULT
11846 && ((base && GET_MODE (base) != word_mode)
11847 || (index && GET_MODE (index) != word_mode)))
11850 /* Extract the integral value of scale. */
11853 if (!CONST_INT_P (scale_rtx))
11855 scale = INTVAL (scale_rtx);
11858 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11859 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11861 /* Avoid useless 0 displacement. */
11862 if (disp == const0_rtx && (base || index))
11865 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11866 if (base_reg && index_reg && scale == 1
11867 && (index_reg == arg_pointer_rtx
11868 || index_reg == frame_pointer_rtx
11869 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11872 tmp = base, base = index, index = tmp;
11873 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11876 /* Special case: %ebp cannot be encoded as a base without a displacement.
11880 && (base_reg == hard_frame_pointer_rtx
11881 || base_reg == frame_pointer_rtx
11882 || base_reg == arg_pointer_rtx
11883 || (REG_P (base_reg)
11884 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11885 || REGNO (base_reg) == R13_REG))))
11888 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11889 Avoid this by transforming to [%esi+0].
11890 Reload calls address legitimization without cfun defined, so we need
11891 to test cfun for being non-NULL. */
11892 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11893 && base_reg && !index_reg && !disp
11894 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11897 /* Special case: encode reg+reg instead of reg*2. */
11898 if (!base && index && scale == 2)
11899 base = index, base_reg = index_reg, scale = 1;
11901 /* Special case: scaling cannot be encoded without base or displacement. */
11902 if (!base && !disp && index && scale != 1)
11906 out->index = index;
11908 out->scale = scale;
11914 /* Return cost of the memory address x.
11915 For i386, it is better to use a complex address than let gcc copy
11916 the address into a reg and make a new pseudo. But not if the address
11917 requires to two regs - that would mean more pseudos with longer
11920 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11921 addr_space_t as ATTRIBUTE_UNUSED,
11922 bool speed ATTRIBUTE_UNUSED)
11924 struct ix86_address parts;
11926 int ok = ix86_decompose_address (x, &parts);
11930 if (parts.base && GET_CODE (parts.base) == SUBREG)
11931 parts.base = SUBREG_REG (parts.base);
11932 if (parts.index && GET_CODE (parts.index) == SUBREG)
11933 parts.index = SUBREG_REG (parts.index);
11935 /* Attempt to minimize number of registers in the address. */
11937 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11939 && (!REG_P (parts.index)
11940 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11944 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11946 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11947 && parts.base != parts.index)
11950 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11951 since it's predecode logic can't detect the length of instructions
11952 and it degenerates to vector decoded. Increase cost of such
11953 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11954 to split such addresses or even refuse such addresses at all.
11956 Following addressing modes are affected:
11961 The first and last case may be avoidable by explicitly coding the zero in
11962 memory address, but I don't have AMD-K6 machine handy to check this
11966 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11967 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11968 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11974 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11975 this is used for to form addresses to local data when -fPIC is in
11979 darwin_local_data_pic (rtx disp)
11981 return (GET_CODE (disp) == UNSPEC
11982 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11985 /* Determine if a given RTX is a valid constant. We already know this
11986 satisfies CONSTANT_P. */
11989 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11991 switch (GET_CODE (x))
11996 if (GET_CODE (x) == PLUS)
11998 if (!CONST_INT_P (XEXP (x, 1)))
12003 if (TARGET_MACHO && darwin_local_data_pic (x))
12006 /* Only some unspecs are valid as "constants". */
12007 if (GET_CODE (x) == UNSPEC)
12008 switch (XINT (x, 1))
12011 case UNSPEC_GOTOFF:
12012 case UNSPEC_PLTOFF:
12013 return TARGET_64BIT;
12015 case UNSPEC_NTPOFF:
12016 x = XVECEXP (x, 0, 0);
12017 return (GET_CODE (x) == SYMBOL_REF
12018 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12019 case UNSPEC_DTPOFF:
12020 x = XVECEXP (x, 0, 0);
12021 return (GET_CODE (x) == SYMBOL_REF
12022 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12027 /* We must have drilled down to a symbol. */
12028 if (GET_CODE (x) == LABEL_REF)
12030 if (GET_CODE (x) != SYMBOL_REF)
12035 /* TLS symbols are never valid. */
12036 if (SYMBOL_REF_TLS_MODEL (x))
12039 /* DLLIMPORT symbols are never valid. */
12040 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12041 && SYMBOL_REF_DLLIMPORT_P (x))
12045 /* mdynamic-no-pic */
12046 if (MACHO_DYNAMIC_NO_PIC_P)
12047 return machopic_symbol_defined_p (x);
12052 if (GET_MODE (x) == TImode
12053 && x != CONST0_RTX (TImode)
12059 if (!standard_sse_constant_p (x))
12066 /* Otherwise we handle everything else in the move patterns. */
12070 /* Determine if it's legal to put X into the constant pool. This
12071 is not possible for the address of thread-local symbols, which
12072 is checked above. */
12075 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12077 /* We can always put integral constants and vectors in memory. */
12078 switch (GET_CODE (x))
12088 return !ix86_legitimate_constant_p (mode, x);
12092 /* Nonzero if the constant value X is a legitimate general operand
12093 when generating PIC code. It is given that flag_pic is on and
12094 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12097 legitimate_pic_operand_p (rtx x)
12101 switch (GET_CODE (x))
12104 inner = XEXP (x, 0);
12105 if (GET_CODE (inner) == PLUS
12106 && CONST_INT_P (XEXP (inner, 1)))
12107 inner = XEXP (inner, 0);
12109 /* Only some unspecs are valid as "constants". */
12110 if (GET_CODE (inner) == UNSPEC)
12111 switch (XINT (inner, 1))
12114 case UNSPEC_GOTOFF:
12115 case UNSPEC_PLTOFF:
12116 return TARGET_64BIT;
12118 x = XVECEXP (inner, 0, 0);
12119 return (GET_CODE (x) == SYMBOL_REF
12120 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12121 case UNSPEC_MACHOPIC_OFFSET:
12122 return legitimate_pic_address_disp_p (x);
12130 return legitimate_pic_address_disp_p (x);
12137 /* Determine if a given CONST RTX is a valid memory displacement
12141 legitimate_pic_address_disp_p (rtx disp)
12145 /* In 64bit mode we can allow direct addresses of symbols and labels
12146 when they are not dynamic symbols. */
12149 rtx op0 = disp, op1;
12151 switch (GET_CODE (disp))
12157 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12159 op0 = XEXP (XEXP (disp, 0), 0);
12160 op1 = XEXP (XEXP (disp, 0), 1);
12161 if (!CONST_INT_P (op1)
12162 || INTVAL (op1) >= 16*1024*1024
12163 || INTVAL (op1) < -16*1024*1024)
12165 if (GET_CODE (op0) == LABEL_REF)
12167 if (GET_CODE (op0) == CONST
12168 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12169 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12171 if (GET_CODE (op0) == UNSPEC
12172 && XINT (op0, 1) == UNSPEC_PCREL)
12174 if (GET_CODE (op0) != SYMBOL_REF)
12179 /* TLS references should always be enclosed in UNSPEC. */
12180 if (SYMBOL_REF_TLS_MODEL (op0))
12182 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12183 && ix86_cmodel != CM_LARGE_PIC)
12191 if (GET_CODE (disp) != CONST)
12193 disp = XEXP (disp, 0);
12197 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12198 of GOT tables. We should not need these anyway. */
12199 if (GET_CODE (disp) != UNSPEC
12200 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12201 && XINT (disp, 1) != UNSPEC_GOTOFF
12202 && XINT (disp, 1) != UNSPEC_PCREL
12203 && XINT (disp, 1) != UNSPEC_PLTOFF))
12206 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12207 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12213 if (GET_CODE (disp) == PLUS)
12215 if (!CONST_INT_P (XEXP (disp, 1)))
12217 disp = XEXP (disp, 0);
12221 if (TARGET_MACHO && darwin_local_data_pic (disp))
12224 if (GET_CODE (disp) != UNSPEC)
12227 switch (XINT (disp, 1))
12232 /* We need to check for both symbols and labels because VxWorks loads
12233 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12235 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12236 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12237 case UNSPEC_GOTOFF:
12238 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12239 While ABI specify also 32bit relocation but we don't produce it in
12240 small PIC model at all. */
12241 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12242 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12244 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12246 case UNSPEC_GOTTPOFF:
12247 case UNSPEC_GOTNTPOFF:
12248 case UNSPEC_INDNTPOFF:
12251 disp = XVECEXP (disp, 0, 0);
12252 return (GET_CODE (disp) == SYMBOL_REF
12253 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12254 case UNSPEC_NTPOFF:
12255 disp = XVECEXP (disp, 0, 0);
12256 return (GET_CODE (disp) == SYMBOL_REF
12257 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12258 case UNSPEC_DTPOFF:
12259 disp = XVECEXP (disp, 0, 0);
12260 return (GET_CODE (disp) == SYMBOL_REF
12261 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12267 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12268 replace the input X, or the original X if no replacement is called for.
12269 The output parameter *WIN is 1 if the calling macro should goto WIN,
12270 0 if it should not. */
12273 ix86_legitimize_reload_address (rtx x,
12274 enum machine_mode mode ATTRIBUTE_UNUSED,
12275 int opnum, int type,
12276 int ind_levels ATTRIBUTE_UNUSED)
12278 /* Reload can generate:
12280 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12284 This RTX is rejected from ix86_legitimate_address_p due to
12285 non-strictness of base register 97. Following this rejection,
12286 reload pushes all three components into separate registers,
12287 creating invalid memory address RTX.
12289 Following code reloads only the invalid part of the
12290 memory address RTX. */
12292 if (GET_CODE (x) == PLUS
12293 && REG_P (XEXP (x, 1))
12294 && GET_CODE (XEXP (x, 0)) == PLUS
12295 && REG_P (XEXP (XEXP (x, 0), 1)))
12298 bool something_reloaded = false;
12300 base = XEXP (XEXP (x, 0), 1);
12301 if (!REG_OK_FOR_BASE_STRICT_P (base))
12303 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12304 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12305 opnum, (enum reload_type) type);
12306 something_reloaded = true;
12309 index = XEXP (x, 1);
12310 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12312 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12313 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12314 opnum, (enum reload_type) type);
12315 something_reloaded = true;
12318 gcc_assert (something_reloaded);
12325 /* Recognizes RTL expressions that are valid memory addresses for an
12326 instruction. The MODE argument is the machine mode for the MEM
12327 expression that wants to use this address.
12329 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12330 convert common non-canonical forms to canonical form so that they will
12334 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12335 rtx addr, bool strict)
12337 struct ix86_address parts;
12338 rtx base, index, disp;
12339 HOST_WIDE_INT scale;
12341 if (ix86_decompose_address (addr, &parts) <= 0)
12342 /* Decomposition failed. */
12346 index = parts.index;
12348 scale = parts.scale;
12350 /* Validate base register. */
12357 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12358 reg = SUBREG_REG (base);
12360 /* Base is not a register. */
12363 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12366 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12367 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12368 /* Base is not valid. */
12372 /* Validate index register. */
12379 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12380 reg = SUBREG_REG (index);
12382 /* Index is not a register. */
12385 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12388 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12389 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12390 /* Index is not valid. */
12394 /* Index and base should have the same mode. */
12396 && GET_MODE (base) != GET_MODE (index))
12399 /* Validate scale factor. */
12403 /* Scale without index. */
12406 if (scale != 2 && scale != 4 && scale != 8)
12407 /* Scale is not a valid multiplier. */
12411 /* Validate displacement. */
12414 if (GET_CODE (disp) == CONST
12415 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12416 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12417 switch (XINT (XEXP (disp, 0), 1))
12419 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12420 used. While ABI specify also 32bit relocations, we don't produce
12421 them at all and use IP relative instead. */
12423 case UNSPEC_GOTOFF:
12424 gcc_assert (flag_pic);
12426 goto is_legitimate_pic;
12428 /* 64bit address unspec. */
12431 case UNSPEC_GOTPCREL:
12433 gcc_assert (flag_pic);
12434 goto is_legitimate_pic;
12436 case UNSPEC_GOTTPOFF:
12437 case UNSPEC_GOTNTPOFF:
12438 case UNSPEC_INDNTPOFF:
12439 case UNSPEC_NTPOFF:
12440 case UNSPEC_DTPOFF:
12443 case UNSPEC_STACK_CHECK:
12444 gcc_assert (flag_split_stack);
12448 /* Invalid address unspec. */
12452 else if (SYMBOLIC_CONST (disp)
12456 && MACHOPIC_INDIRECT
12457 && !machopic_operand_p (disp)
12463 if (TARGET_64BIT && (index || base))
12465 /* foo@dtpoff(%rX) is ok. */
12466 if (GET_CODE (disp) != CONST
12467 || GET_CODE (XEXP (disp, 0)) != PLUS
12468 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12469 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12470 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12471 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12472 /* Non-constant pic memory reference. */
12475 else if ((!TARGET_MACHO || flag_pic)
12476 && ! legitimate_pic_address_disp_p (disp))
12477 /* Displacement is an invalid pic construct. */
12480 else if (MACHO_DYNAMIC_NO_PIC_P
12481 && !ix86_legitimate_constant_p (Pmode, disp))
12482 /* displacment must be referenced via non_lazy_pointer */
12486 /* This code used to verify that a symbolic pic displacement
12487 includes the pic_offset_table_rtx register.
12489 While this is good idea, unfortunately these constructs may
12490 be created by "adds using lea" optimization for incorrect
12499 This code is nonsensical, but results in addressing
12500 GOT table with pic_offset_table_rtx base. We can't
12501 just refuse it easily, since it gets matched by
12502 "addsi3" pattern, that later gets split to lea in the
12503 case output register differs from input. While this
12504 can be handled by separate addsi pattern for this case
12505 that never results in lea, this seems to be easier and
12506 correct fix for crash to disable this test. */
12508 else if (GET_CODE (disp) != LABEL_REF
12509 && !CONST_INT_P (disp)
12510 && (GET_CODE (disp) != CONST
12511 || !ix86_legitimate_constant_p (Pmode, disp))
12512 && (GET_CODE (disp) != SYMBOL_REF
12513 || !ix86_legitimate_constant_p (Pmode, disp)))
12514 /* Displacement is not constant. */
12516 else if (TARGET_64BIT
12517 && !x86_64_immediate_operand (disp, VOIDmode))
12518 /* Displacement is out of range. */
12522 /* Everything looks valid. */
12526 /* Determine if a given RTX is a valid constant address. */
12529 constant_address_p (rtx x)
12531 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12534 /* Return a unique alias set for the GOT. */
12536 static alias_set_type
12537 ix86_GOT_alias_set (void)
12539 static alias_set_type set = -1;
12541 set = new_alias_set ();
12545 /* Return a legitimate reference for ORIG (an address) using the
12546 register REG. If REG is 0, a new pseudo is generated.
12548 There are two types of references that must be handled:
12550 1. Global data references must load the address from the GOT, via
12551 the PIC reg. An insn is emitted to do this load, and the reg is
12554 2. Static data references, constant pool addresses, and code labels
12555 compute the address as an offset from the GOT, whose base is in
12556 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12557 differentiate them from global data objects. The returned
12558 address is the PIC reg + an unspec constant.
12560 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12561 reg also appears in the address. */
12564 legitimize_pic_address (rtx orig, rtx reg)
12567 rtx new_rtx = orig;
12570 if (TARGET_MACHO && !TARGET_64BIT)
12573 reg = gen_reg_rtx (Pmode);
12574 /* Use the generic Mach-O PIC machinery. */
12575 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12579 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12581 else if (TARGET_64BIT
12582 && ix86_cmodel != CM_SMALL_PIC
12583 && gotoff_operand (addr, Pmode))
12586 /* This symbol may be referenced via a displacement from the PIC
12587 base address (@GOTOFF). */
12589 if (reload_in_progress)
12590 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12591 if (GET_CODE (addr) == CONST)
12592 addr = XEXP (addr, 0);
12593 if (GET_CODE (addr) == PLUS)
12595 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12597 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12600 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12601 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12603 tmpreg = gen_reg_rtx (Pmode);
12606 emit_move_insn (tmpreg, new_rtx);
12610 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12611 tmpreg, 1, OPTAB_DIRECT);
12614 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12616 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12618 /* This symbol may be referenced via a displacement from the PIC
12619 base address (@GOTOFF). */
12621 if (reload_in_progress)
12622 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12623 if (GET_CODE (addr) == CONST)
12624 addr = XEXP (addr, 0);
12625 if (GET_CODE (addr) == PLUS)
12627 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12629 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12632 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12633 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12634 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12638 emit_move_insn (reg, new_rtx);
12642 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12643 /* We can't use @GOTOFF for text labels on VxWorks;
12644 see gotoff_operand. */
12645 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12647 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12649 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12650 return legitimize_dllimport_symbol (addr, true);
12651 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12652 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12653 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12655 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12656 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12660 /* For x64 PE-COFF there is no GOT table. So we use address
12662 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12664 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12665 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12668 reg = gen_reg_rtx (Pmode);
12669 emit_move_insn (reg, new_rtx);
12672 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12674 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12675 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12676 new_rtx = gen_const_mem (Pmode, new_rtx);
12677 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12680 reg = gen_reg_rtx (Pmode);
12681 /* Use directly gen_movsi, otherwise the address is loaded
12682 into register for CSE. We don't want to CSE this addresses,
12683 instead we CSE addresses from the GOT table, so skip this. */
12684 emit_insn (gen_movsi (reg, new_rtx));
12689 /* This symbol must be referenced via a load from the
12690 Global Offset Table (@GOT). */
12692 if (reload_in_progress)
12693 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12694 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12695 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12697 new_rtx = force_reg (Pmode, new_rtx);
12698 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12699 new_rtx = gen_const_mem (Pmode, new_rtx);
12700 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12703 reg = gen_reg_rtx (Pmode);
12704 emit_move_insn (reg, new_rtx);
12710 if (CONST_INT_P (addr)
12711 && !x86_64_immediate_operand (addr, VOIDmode))
12715 emit_move_insn (reg, addr);
12719 new_rtx = force_reg (Pmode, addr);
12721 else if (GET_CODE (addr) == CONST)
12723 addr = XEXP (addr, 0);
12725 /* We must match stuff we generate before. Assume the only
12726 unspecs that can get here are ours. Not that we could do
12727 anything with them anyway.... */
12728 if (GET_CODE (addr) == UNSPEC
12729 || (GET_CODE (addr) == PLUS
12730 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12732 gcc_assert (GET_CODE (addr) == PLUS);
12734 if (GET_CODE (addr) == PLUS)
12736 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12738 /* Check first to see if this is a constant offset from a @GOTOFF
12739 symbol reference. */
12740 if (gotoff_operand (op0, Pmode)
12741 && CONST_INT_P (op1))
12745 if (reload_in_progress)
12746 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12747 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12749 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12750 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12751 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12755 emit_move_insn (reg, new_rtx);
12761 if (INTVAL (op1) < -16*1024*1024
12762 || INTVAL (op1) >= 16*1024*1024)
12764 if (!x86_64_immediate_operand (op1, Pmode))
12765 op1 = force_reg (Pmode, op1);
12766 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12772 rtx base = legitimize_pic_address (op0, reg);
12773 enum machine_mode mode = GET_MODE (base);
12775 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12777 if (CONST_INT_P (new_rtx))
12779 if (INTVAL (new_rtx) < -16*1024*1024
12780 || INTVAL (new_rtx) >= 16*1024*1024)
12782 if (!x86_64_immediate_operand (new_rtx, mode))
12783 new_rtx = force_reg (mode, new_rtx);
12785 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12788 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12792 if (GET_CODE (new_rtx) == PLUS
12793 && CONSTANT_P (XEXP (new_rtx, 1)))
12795 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12796 new_rtx = XEXP (new_rtx, 1);
12798 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12806 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12809 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
12811 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12813 if (GET_MODE (tp) != tp_mode)
12815 gcc_assert (GET_MODE (tp) == SImode);
12816 gcc_assert (tp_mode == DImode);
12818 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12822 tp = copy_to_mode_reg (tp_mode, tp);
12827 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12829 static GTY(()) rtx ix86_tls_symbol;
12832 ix86_tls_get_addr (void)
12834 if (!ix86_tls_symbol)
12837 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12838 ? "___tls_get_addr" : "__tls_get_addr");
12840 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12843 return ix86_tls_symbol;
12846 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12848 static GTY(()) rtx ix86_tls_module_base_symbol;
12851 ix86_tls_module_base (void)
12853 if (!ix86_tls_module_base_symbol)
12855 ix86_tls_module_base_symbol
12856 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12858 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12859 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12862 return ix86_tls_module_base_symbol;
12865 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12866 false if we expect this to be used for a memory address and true if
12867 we expect to load the address into a register. */
12870 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12872 rtx dest, base, off;
12873 rtx pic = NULL_RTX, tp = NULL_RTX;
12874 enum machine_mode tp_mode = Pmode;
12879 case TLS_MODEL_GLOBAL_DYNAMIC:
12880 dest = gen_reg_rtx (Pmode);
12885 pic = pic_offset_table_rtx;
12888 pic = gen_reg_rtx (Pmode);
12889 emit_insn (gen_set_got (pic));
12893 if (TARGET_GNU2_TLS)
12896 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12898 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12900 tp = get_thread_pointer (Pmode, true);
12901 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12903 if (GET_MODE (x) != Pmode)
12904 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12906 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12910 rtx caddr = ix86_tls_get_addr ();
12914 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12919 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
12920 insns = get_insns ();
12923 if (GET_MODE (x) != Pmode)
12924 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12926 RTL_CONST_CALL_P (insns) = 1;
12927 emit_libcall_block (insns, dest, rax, x);
12930 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12934 case TLS_MODEL_LOCAL_DYNAMIC:
12935 base = gen_reg_rtx (Pmode);
12940 pic = pic_offset_table_rtx;
12943 pic = gen_reg_rtx (Pmode);
12944 emit_insn (gen_set_got (pic));
12948 if (TARGET_GNU2_TLS)
12950 rtx tmp = ix86_tls_module_base ();
12953 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12955 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12957 tp = get_thread_pointer (Pmode, true);
12958 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12959 gen_rtx_MINUS (Pmode, tmp, tp));
12963 rtx caddr = ix86_tls_get_addr ();
12967 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12972 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
12973 insns = get_insns ();
12976 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12977 share the LD_BASE result with other LD model accesses. */
12978 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12979 UNSPEC_TLS_LD_BASE);
12981 RTL_CONST_CALL_P (insns) = 1;
12982 emit_libcall_block (insns, base, rax, eqv);
12985 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12988 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12989 off = gen_rtx_CONST (Pmode, off);
12991 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12993 if (TARGET_GNU2_TLS)
12995 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12997 if (GET_MODE (x) != Pmode)
12998 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13000 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13004 case TLS_MODEL_INITIAL_EXEC:
13007 if (TARGET_SUN_TLS && !TARGET_X32)
13009 /* The Sun linker took the AMD64 TLS spec literally
13010 and can only handle %rax as destination of the
13011 initial executable code sequence. */
13013 dest = gen_reg_rtx (DImode);
13014 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13018 /* Generate DImode references to avoid %fs:(%reg32)
13019 problems and linker IE->LE relaxation bug. */
13022 type = UNSPEC_GOTNTPOFF;
13026 if (reload_in_progress)
13027 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13028 pic = pic_offset_table_rtx;
13029 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13031 else if (!TARGET_ANY_GNU_TLS)
13033 pic = gen_reg_rtx (Pmode);
13034 emit_insn (gen_set_got (pic));
13035 type = UNSPEC_GOTTPOFF;
13040 type = UNSPEC_INDNTPOFF;
13043 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13044 off = gen_rtx_CONST (tp_mode, off);
13046 off = gen_rtx_PLUS (tp_mode, pic, off);
13047 off = gen_const_mem (tp_mode, off);
13048 set_mem_alias_set (off, ix86_GOT_alias_set ());
13050 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13052 base = get_thread_pointer (tp_mode,
13053 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13054 off = force_reg (tp_mode, off);
13055 return gen_rtx_PLUS (tp_mode, base, off);
13059 base = get_thread_pointer (Pmode, true);
13060 dest = gen_reg_rtx (Pmode);
13061 emit_insn (ix86_gen_sub3 (dest, base, off));
13065 case TLS_MODEL_LOCAL_EXEC:
13066 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13067 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13068 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13069 off = gen_rtx_CONST (Pmode, off);
13071 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13073 base = get_thread_pointer (Pmode,
13074 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13075 return gen_rtx_PLUS (Pmode, base, off);
13079 base = get_thread_pointer (Pmode, true);
13080 dest = gen_reg_rtx (Pmode);
13081 emit_insn (ix86_gen_sub3 (dest, base, off));
13086 gcc_unreachable ();
13092 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13095 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13096 htab_t dllimport_map;
13099 get_dllimport_decl (tree decl)
13101 struct tree_map *h, in;
13104 const char *prefix;
13105 size_t namelen, prefixlen;
13110 if (!dllimport_map)
13111 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13113 in.hash = htab_hash_pointer (decl);
13114 in.base.from = decl;
13115 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13116 h = (struct tree_map *) *loc;
13120 *loc = h = ggc_alloc_tree_map ();
13122 h->base.from = decl;
13123 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13124 VAR_DECL, NULL, ptr_type_node);
13125 DECL_ARTIFICIAL (to) = 1;
13126 DECL_IGNORED_P (to) = 1;
13127 DECL_EXTERNAL (to) = 1;
13128 TREE_READONLY (to) = 1;
13130 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13131 name = targetm.strip_name_encoding (name);
13132 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13133 ? "*__imp_" : "*__imp__";
13134 namelen = strlen (name);
13135 prefixlen = strlen (prefix);
13136 imp_name = (char *) alloca (namelen + prefixlen + 1);
13137 memcpy (imp_name, prefix, prefixlen);
13138 memcpy (imp_name + prefixlen, name, namelen + 1);
13140 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13141 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13142 SET_SYMBOL_REF_DECL (rtl, to);
13143 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
13145 rtl = gen_const_mem (Pmode, rtl);
13146 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13148 SET_DECL_RTL (to, rtl);
13149 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13154 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13155 true if we require the result be a register. */
13158 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13163 gcc_assert (SYMBOL_REF_DECL (symbol));
13164 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
13166 x = DECL_RTL (imp_decl);
13168 x = force_reg (Pmode, x);
13172 /* Try machine-dependent ways of modifying an illegitimate address
13173 to be legitimate. If we find one, return the new, valid address.
13174 This macro is used in only one place: `memory_address' in explow.c.
13176 OLDX is the address as it was before break_out_memory_refs was called.
13177 In some cases it is useful to look at this to decide what needs to be done.
13179 It is always safe for this macro to do nothing. It exists to recognize
13180 opportunities to optimize the output.
13182 For the 80386, we handle X+REG by loading X into a register R and
13183 using R+REG. R will go in a general reg and indexing will be used.
13184 However, if REG is a broken-out memory address or multiplication,
13185 nothing needs to be done because REG can certainly go in a general reg.
13187 When -fpic is used, special handling is needed for symbolic references.
13188 See comments by legitimize_pic_address in i386.c for details. */
13191 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13192 enum machine_mode mode)
13197 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13199 return legitimize_tls_address (x, (enum tls_model) log, false);
13200 if (GET_CODE (x) == CONST
13201 && GET_CODE (XEXP (x, 0)) == PLUS
13202 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13203 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13205 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13206 (enum tls_model) log, false);
13207 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13210 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13212 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
13213 return legitimize_dllimport_symbol (x, true);
13214 if (GET_CODE (x) == CONST
13215 && GET_CODE (XEXP (x, 0)) == PLUS
13216 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13217 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
13219 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
13220 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13224 if (flag_pic && SYMBOLIC_CONST (x))
13225 return legitimize_pic_address (x, 0);
13228 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13229 return machopic_indirect_data_reference (x, 0);
13232 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13233 if (GET_CODE (x) == ASHIFT
13234 && CONST_INT_P (XEXP (x, 1))
13235 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13238 log = INTVAL (XEXP (x, 1));
13239 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13240 GEN_INT (1 << log));
13243 if (GET_CODE (x) == PLUS)
13245 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13247 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13248 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13249 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13252 log = INTVAL (XEXP (XEXP (x, 0), 1));
13253 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13254 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13255 GEN_INT (1 << log));
13258 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13259 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13260 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13263 log = INTVAL (XEXP (XEXP (x, 1), 1));
13264 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13265 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13266 GEN_INT (1 << log));
13269 /* Put multiply first if it isn't already. */
13270 if (GET_CODE (XEXP (x, 1)) == MULT)
13272 rtx tmp = XEXP (x, 0);
13273 XEXP (x, 0) = XEXP (x, 1);
13278 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13279 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13280 created by virtual register instantiation, register elimination, and
13281 similar optimizations. */
13282 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13285 x = gen_rtx_PLUS (Pmode,
13286 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13287 XEXP (XEXP (x, 1), 0)),
13288 XEXP (XEXP (x, 1), 1));
13292 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13293 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13294 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13296 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13297 && CONSTANT_P (XEXP (x, 1)))
13300 rtx other = NULL_RTX;
13302 if (CONST_INT_P (XEXP (x, 1)))
13304 constant = XEXP (x, 1);
13305 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13307 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13309 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13310 other = XEXP (x, 1);
13318 x = gen_rtx_PLUS (Pmode,
13319 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13320 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13321 plus_constant (Pmode, other,
13322 INTVAL (constant)));
13326 if (changed && ix86_legitimate_address_p (mode, x, false))
13329 if (GET_CODE (XEXP (x, 0)) == MULT)
13332 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13335 if (GET_CODE (XEXP (x, 1)) == MULT)
13338 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13342 && REG_P (XEXP (x, 1))
13343 && REG_P (XEXP (x, 0)))
13346 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13349 x = legitimize_pic_address (x, 0);
13352 if (changed && ix86_legitimate_address_p (mode, x, false))
13355 if (REG_P (XEXP (x, 0)))
13357 rtx temp = gen_reg_rtx (Pmode);
13358 rtx val = force_operand (XEXP (x, 1), temp);
13361 val = convert_to_mode (Pmode, val, 1);
13362 emit_move_insn (temp, val);
13365 XEXP (x, 1) = temp;
13369 else if (REG_P (XEXP (x, 1)))
13371 rtx temp = gen_reg_rtx (Pmode);
13372 rtx val = force_operand (XEXP (x, 0), temp);
13375 val = convert_to_mode (Pmode, val, 1);
13376 emit_move_insn (temp, val);
13379 XEXP (x, 0) = temp;
13387 /* Print an integer constant expression in assembler syntax. Addition
13388 and subtraction are the only arithmetic that may appear in these
13389 expressions. FILE is the stdio stream to write to, X is the rtx, and
13390 CODE is the operand print code from the output string. */
13393 output_pic_addr_const (FILE *file, rtx x, int code)
13397 switch (GET_CODE (x))
13400 gcc_assert (flag_pic);
13405 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13406 output_addr_const (file, x);
13409 const char *name = XSTR (x, 0);
13411 /* Mark the decl as referenced so that cgraph will
13412 output the function. */
13413 if (SYMBOL_REF_DECL (x))
13414 mark_decl_referenced (SYMBOL_REF_DECL (x));
13417 if (MACHOPIC_INDIRECT
13418 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13419 name = machopic_indirection_name (x, /*stub_p=*/true);
13421 assemble_name (file, name);
13423 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13424 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13425 fputs ("@PLT", file);
13432 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13433 assemble_name (asm_out_file, buf);
13437 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13441 /* This used to output parentheses around the expression,
13442 but that does not work on the 386 (either ATT or BSD assembler). */
13443 output_pic_addr_const (file, XEXP (x, 0), code);
13447 if (GET_MODE (x) == VOIDmode)
13449 /* We can use %d if the number is <32 bits and positive. */
13450 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13451 fprintf (file, "0x%lx%08lx",
13452 (unsigned long) CONST_DOUBLE_HIGH (x),
13453 (unsigned long) CONST_DOUBLE_LOW (x));
13455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13458 /* We can't handle floating point constants;
13459 TARGET_PRINT_OPERAND must handle them. */
13460 output_operand_lossage ("floating constant misused");
13464 /* Some assemblers need integer constants to appear first. */
13465 if (CONST_INT_P (XEXP (x, 0)))
13467 output_pic_addr_const (file, XEXP (x, 0), code);
13469 output_pic_addr_const (file, XEXP (x, 1), code);
13473 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13474 output_pic_addr_const (file, XEXP (x, 1), code);
13476 output_pic_addr_const (file, XEXP (x, 0), code);
13482 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13483 output_pic_addr_const (file, XEXP (x, 0), code);
13485 output_pic_addr_const (file, XEXP (x, 1), code);
13487 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13491 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13493 bool f = i386_asm_output_addr_const_extra (file, x);
13498 gcc_assert (XVECLEN (x, 0) == 1);
13499 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13500 switch (XINT (x, 1))
13503 fputs ("@GOT", file);
13505 case UNSPEC_GOTOFF:
13506 fputs ("@GOTOFF", file);
13508 case UNSPEC_PLTOFF:
13509 fputs ("@PLTOFF", file);
13512 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13513 "(%rip)" : "[rip]", file);
13515 case UNSPEC_GOTPCREL:
13516 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13517 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13519 case UNSPEC_GOTTPOFF:
13520 /* FIXME: This might be @TPOFF in Sun ld too. */
13521 fputs ("@gottpoff", file);
13524 fputs ("@tpoff", file);
13526 case UNSPEC_NTPOFF:
13528 fputs ("@tpoff", file);
13530 fputs ("@ntpoff", file);
13532 case UNSPEC_DTPOFF:
13533 fputs ("@dtpoff", file);
13535 case UNSPEC_GOTNTPOFF:
13537 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13538 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13540 fputs ("@gotntpoff", file);
13542 case UNSPEC_INDNTPOFF:
13543 fputs ("@indntpoff", file);
13546 case UNSPEC_MACHOPIC_OFFSET:
13548 machopic_output_function_base_name (file);
13552 output_operand_lossage ("invalid UNSPEC as operand");
13558 output_operand_lossage ("invalid expression as operand");
13562 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13563 We need to emit DTP-relative relocations. */
13565 static void ATTRIBUTE_UNUSED
13566 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13568 fputs (ASM_LONG, file);
13569 output_addr_const (file, x);
13570 fputs ("@dtpoff", file);
13576 fputs (", 0", file);
13579 gcc_unreachable ();
13583 /* Return true if X is a representation of the PIC register. This copes
13584 with calls from ix86_find_base_term, where the register might have
13585 been replaced by a cselib value. */
13588 ix86_pic_register_p (rtx x)
13590 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13591 return (pic_offset_table_rtx
13592 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13594 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13597 /* Helper function for ix86_delegitimize_address.
13598 Attempt to delegitimize TLS local-exec accesses. */
13601 ix86_delegitimize_tls_address (rtx orig_x)
13603 rtx x = orig_x, unspec;
13604 struct ix86_address addr;
13606 if (!TARGET_TLS_DIRECT_SEG_REFS)
13610 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13612 if (ix86_decompose_address (x, &addr) == 0
13613 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13614 || addr.disp == NULL_RTX
13615 || GET_CODE (addr.disp) != CONST)
13617 unspec = XEXP (addr.disp, 0);
13618 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13619 unspec = XEXP (unspec, 0);
13620 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13622 x = XVECEXP (unspec, 0, 0);
13623 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13624 if (unspec != XEXP (addr.disp, 0))
13625 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13628 rtx idx = addr.index;
13629 if (addr.scale != 1)
13630 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13631 x = gen_rtx_PLUS (Pmode, idx, x);
13634 x = gen_rtx_PLUS (Pmode, addr.base, x);
13635 if (MEM_P (orig_x))
13636 x = replace_equiv_address_nv (orig_x, x);
13640 /* In the name of slightly smaller debug output, and to cater to
13641 general assembler lossage, recognize PIC+GOTOFF and turn it back
13642 into a direct symbol reference.
13644 On Darwin, this is necessary to avoid a crash, because Darwin
13645 has a different PIC label for each routine but the DWARF debugging
13646 information is not associated with any particular routine, so it's
13647 necessary to remove references to the PIC label from RTL stored by
13648 the DWARF output code. */
13651 ix86_delegitimize_address (rtx x)
13653 rtx orig_x = delegitimize_mem_from_attrs (x);
13654 /* addend is NULL or some rtx if x is something+GOTOFF where
13655 something doesn't include the PIC register. */
13656 rtx addend = NULL_RTX;
13657 /* reg_addend is NULL or a multiple of some register. */
13658 rtx reg_addend = NULL_RTX;
13659 /* const_addend is NULL or a const_int. */
13660 rtx const_addend = NULL_RTX;
13661 /* This is the result, or NULL. */
13662 rtx result = NULL_RTX;
13671 if (GET_CODE (x) == CONST
13672 && GET_CODE (XEXP (x, 0)) == PLUS
13673 && GET_MODE (XEXP (x, 0)) == Pmode
13674 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13675 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13676 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13678 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13679 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13680 if (MEM_P (orig_x))
13681 x = replace_equiv_address_nv (orig_x, x);
13684 if (GET_CODE (x) != CONST
13685 || GET_CODE (XEXP (x, 0)) != UNSPEC
13686 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13687 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13688 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13689 return ix86_delegitimize_tls_address (orig_x);
13690 x = XVECEXP (XEXP (x, 0), 0, 0);
13691 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13693 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13701 if (GET_CODE (x) != PLUS
13702 || GET_CODE (XEXP (x, 1)) != CONST)
13703 return ix86_delegitimize_tls_address (orig_x);
13705 if (ix86_pic_register_p (XEXP (x, 0)))
13706 /* %ebx + GOT/GOTOFF */
13708 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13710 /* %ebx + %reg * scale + GOT/GOTOFF */
13711 reg_addend = XEXP (x, 0);
13712 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13713 reg_addend = XEXP (reg_addend, 1);
13714 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13715 reg_addend = XEXP (reg_addend, 0);
13718 reg_addend = NULL_RTX;
13719 addend = XEXP (x, 0);
13723 addend = XEXP (x, 0);
13725 x = XEXP (XEXP (x, 1), 0);
13726 if (GET_CODE (x) == PLUS
13727 && CONST_INT_P (XEXP (x, 1)))
13729 const_addend = XEXP (x, 1);
13733 if (GET_CODE (x) == UNSPEC
13734 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13735 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13736 result = XVECEXP (x, 0, 0);
13738 if (TARGET_MACHO && darwin_local_data_pic (x)
13739 && !MEM_P (orig_x))
13740 result = XVECEXP (x, 0, 0);
13743 return ix86_delegitimize_tls_address (orig_x);
13746 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13748 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13751 /* If the rest of original X doesn't involve the PIC register, add
13752 addend and subtract pic_offset_table_rtx. This can happen e.g.
13754 leal (%ebx, %ecx, 4), %ecx
13756 movl foo@GOTOFF(%ecx), %edx
13757 in which case we return (%ecx - %ebx) + foo. */
13758 if (pic_offset_table_rtx)
13759 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13760 pic_offset_table_rtx),
13765 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13767 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13768 if (result == NULL_RTX)
13774 /* If X is a machine specific address (i.e. a symbol or label being
13775 referenced as a displacement from the GOT implemented using an
13776 UNSPEC), then return the base term. Otherwise return X. */
13779 ix86_find_base_term (rtx x)
13785 if (GET_CODE (x) != CONST)
13787 term = XEXP (x, 0);
13788 if (GET_CODE (term) == PLUS
13789 && (CONST_INT_P (XEXP (term, 1))
13790 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13791 term = XEXP (term, 0);
13792 if (GET_CODE (term) != UNSPEC
13793 || (XINT (term, 1) != UNSPEC_GOTPCREL
13794 && XINT (term, 1) != UNSPEC_PCREL))
13797 return XVECEXP (term, 0, 0);
13800 return ix86_delegitimize_address (x);
13804 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
13805 bool fp, FILE *file)
13807 const char *suffix;
13809 if (mode == CCFPmode || mode == CCFPUmode)
13811 code = ix86_fp_compare_code_to_integer (code);
13815 code = reverse_condition (code);
13866 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13870 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13871 Those same assemblers have the same but opposite lossage on cmov. */
13872 if (mode == CCmode)
13873 suffix = fp ? "nbe" : "a";
13874 else if (mode == CCCmode)
13877 gcc_unreachable ();
13893 gcc_unreachable ();
13897 gcc_assert (mode == CCmode || mode == CCCmode);
13914 gcc_unreachable ();
13918 /* ??? As above. */
13919 gcc_assert (mode == CCmode || mode == CCCmode);
13920 suffix = fp ? "nb" : "ae";
13923 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13927 /* ??? As above. */
13928 if (mode == CCmode)
13930 else if (mode == CCCmode)
13931 suffix = fp ? "nb" : "ae";
13933 gcc_unreachable ();
13936 suffix = fp ? "u" : "p";
13939 suffix = fp ? "nu" : "np";
13942 gcc_unreachable ();
13944 fputs (suffix, file);
13947 /* Print the name of register X to FILE based on its machine mode and number.
13948 If CODE is 'w', pretend the mode is HImode.
13949 If CODE is 'b', pretend the mode is QImode.
13950 If CODE is 'k', pretend the mode is SImode.
13951 If CODE is 'q', pretend the mode is DImode.
13952 If CODE is 'x', pretend the mode is V4SFmode.
13953 If CODE is 't', pretend the mode is V8SFmode.
13954 If CODE is 'h', pretend the reg is the 'high' byte register.
13955 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13956 If CODE is 'd', duplicate the operand for AVX instruction.
13960 print_reg (rtx x, int code, FILE *file)
13963 unsigned int regno;
13964 bool duplicated = code == 'd' && TARGET_AVX;
13966 if (ASSEMBLER_DIALECT == ASM_ATT)
13971 gcc_assert (TARGET_64BIT);
13972 fputs ("rip", file);
13976 regno = true_regnum (x);
13977 gcc_assert (regno != ARG_POINTER_REGNUM
13978 && regno != FRAME_POINTER_REGNUM
13979 && regno != FLAGS_REG
13980 && regno != FPSR_REG
13981 && regno != FPCR_REG);
13983 if (code == 'w' || MMX_REG_P (x))
13985 else if (code == 'b')
13987 else if (code == 'k')
13989 else if (code == 'q')
13991 else if (code == 'y')
13993 else if (code == 'h')
13995 else if (code == 'x')
13997 else if (code == 't')
14000 code = GET_MODE_SIZE (GET_MODE (x));
14002 /* Irritatingly, AMD extended registers use different naming convention
14003 from the normal registers: "r%d[bwd]" */
14004 if (REX_INT_REGNO_P (regno))
14006 gcc_assert (TARGET_64BIT);
14008 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14012 error ("extended registers have no high halves");
14027 error ("unsupported operand size for extended register");
14037 if (STACK_TOP_P (x))
14046 if (! ANY_FP_REG_P (x))
14047 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14052 reg = hi_reg_name[regno];
14055 if (regno >= ARRAY_SIZE (qi_reg_name))
14057 reg = qi_reg_name[regno];
14060 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14062 reg = qi_high_reg_name[regno];
14067 gcc_assert (!duplicated);
14069 fputs (hi_reg_name[regno] + 1, file);
14074 gcc_unreachable ();
14080 if (ASSEMBLER_DIALECT == ASM_ATT)
14081 fprintf (file, ", %%%s", reg);
14083 fprintf (file, ", %s", reg);
14087 /* Locate some local-dynamic symbol still in use by this function
14088 so that we can print its name in some tls_local_dynamic_base
14092 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14096 if (GET_CODE (x) == SYMBOL_REF
14097 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14099 cfun->machine->some_ld_name = XSTR (x, 0);
14106 static const char *
14107 get_some_local_dynamic_name (void)
14111 if (cfun->machine->some_ld_name)
14112 return cfun->machine->some_ld_name;
14114 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14115 if (NONDEBUG_INSN_P (insn)
14116 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14117 return cfun->machine->some_ld_name;
14122 /* Meaning of CODE:
14123 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14124 C -- print opcode suffix for set/cmov insn.
14125 c -- like C, but print reversed condition
14126 F,f -- likewise, but for floating-point.
14127 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14129 R -- print the prefix for register names.
14130 z -- print the opcode suffix for the size of the current operand.
14131 Z -- likewise, with special suffixes for x87 instructions.
14132 * -- print a star (in certain assembler syntax)
14133 A -- print an absolute memory reference.
14134 E -- print address with DImode register names if TARGET_64BIT.
14135 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14136 s -- print a shift double count, followed by the assemblers argument
14138 b -- print the QImode name of the register for the indicated operand.
14139 %b0 would print %al if operands[0] is reg 0.
14140 w -- likewise, print the HImode name of the register.
14141 k -- likewise, print the SImode name of the register.
14142 q -- likewise, print the DImode name of the register.
14143 x -- likewise, print the V4SFmode name of the register.
14144 t -- likewise, print the V8SFmode name of the register.
14145 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14146 y -- print "st(0)" instead of "st" as a register.
14147 d -- print duplicated register operand for AVX instruction.
14148 D -- print condition for SSE cmp instruction.
14149 P -- if PIC, print an @PLT suffix.
14150 p -- print raw symbol name.
14151 X -- don't print any sort of PIC '@' suffix for a symbol.
14152 & -- print some in-use local-dynamic symbol name.
14153 H -- print a memory address offset by 8; used for sse high-parts
14154 Y -- print condition for XOP pcom* instruction.
14155 + -- print a branch hint as 'cs' or 'ds' prefix
14156 ; -- print a semicolon (after prefixes due to bug in older gas).
14157 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14158 @ -- print a segment register of thread base pointer load
14159 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14163 ix86_print_operand (FILE *file, rtx x, int code)
14170 switch (ASSEMBLER_DIALECT)
14177 /* Intel syntax. For absolute addresses, registers should not
14178 be surrounded by braces. */
14182 ix86_print_operand (file, x, 0);
14189 gcc_unreachable ();
14192 ix86_print_operand (file, x, 0);
14196 /* Wrap address in an UNSPEC to declare special handling. */
14198 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14200 output_address (x);
14204 if (ASSEMBLER_DIALECT == ASM_ATT)
14209 if (ASSEMBLER_DIALECT == ASM_ATT)
14214 if (ASSEMBLER_DIALECT == ASM_ATT)
14219 if (ASSEMBLER_DIALECT == ASM_ATT)
14224 if (ASSEMBLER_DIALECT == ASM_ATT)
14229 if (ASSEMBLER_DIALECT == ASM_ATT)
14234 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14235 if (ASSEMBLER_DIALECT != ASM_ATT)
14238 switch (GET_MODE_SIZE (GET_MODE (x)))
14253 output_operand_lossage
14254 ("invalid operand size for operand code 'O'");
14263 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14265 /* Opcodes don't get size suffixes if using Intel opcodes. */
14266 if (ASSEMBLER_DIALECT == ASM_INTEL)
14269 switch (GET_MODE_SIZE (GET_MODE (x)))
14288 output_operand_lossage
14289 ("invalid operand size for operand code 'z'");
14294 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14296 (0, "non-integer operand used with operand code 'z'");
14300 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14301 if (ASSEMBLER_DIALECT == ASM_INTEL)
14304 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14306 switch (GET_MODE_SIZE (GET_MODE (x)))
14309 #ifdef HAVE_AS_IX86_FILDS
14319 #ifdef HAVE_AS_IX86_FILDQ
14322 fputs ("ll", file);
14330 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14332 /* 387 opcodes don't get size suffixes
14333 if the operands are registers. */
14334 if (STACK_REG_P (x))
14337 switch (GET_MODE_SIZE (GET_MODE (x)))
14358 output_operand_lossage
14359 ("invalid operand type used with operand code 'Z'");
14363 output_operand_lossage
14364 ("invalid operand size for operand code 'Z'");
14382 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14384 ix86_print_operand (file, x, 0);
14385 fputs (", ", file);
14390 switch (GET_CODE (x))
14393 fputs ("neq", file);
14396 fputs ("eq", file);
14400 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14404 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14408 fputs ("le", file);
14412 fputs ("lt", file);
14415 fputs ("unord", file);
14418 fputs ("ord", file);
14421 fputs ("ueq", file);
14424 fputs ("nlt", file);
14427 fputs ("nle", file);
14430 fputs ("ule", file);
14433 fputs ("ult", file);
14436 fputs ("une", file);
14439 output_operand_lossage ("operand is not a condition code, "
14440 "invalid operand code 'Y'");
14446 /* Little bit of braindamage here. The SSE compare instructions
14447 does use completely different names for the comparisons that the
14448 fp conditional moves. */
14449 switch (GET_CODE (x))
14454 fputs ("eq_us", file);
14458 fputs ("eq", file);
14463 fputs ("nge", file);
14467 fputs ("lt", file);
14472 fputs ("ngt", file);
14476 fputs ("le", file);
14479 fputs ("unord", file);
14484 fputs ("neq_oq", file);
14488 fputs ("neq", file);
14493 fputs ("ge", file);
14497 fputs ("nlt", file);
14502 fputs ("gt", file);
14506 fputs ("nle", file);
14509 fputs ("ord", file);
14512 output_operand_lossage ("operand is not a condition code, "
14513 "invalid operand code 'D'");
14520 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14521 if (ASSEMBLER_DIALECT == ASM_ATT)
14527 if (!COMPARISON_P (x))
14529 output_operand_lossage ("operand is not a condition code, "
14530 "invalid operand code '%c'", code);
14533 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14534 code == 'c' || code == 'f',
14535 code == 'F' || code == 'f',
14540 if (!offsettable_memref_p (x))
14542 output_operand_lossage ("operand is not an offsettable memory "
14543 "reference, invalid operand code 'H'");
14546 /* It doesn't actually matter what mode we use here, as we're
14547 only going to use this for printing. */
14548 x = adjust_address_nv (x, DImode, 8);
14552 gcc_assert (CONST_INT_P (x));
14554 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14555 #ifdef HAVE_AS_IX86_HLE
14556 fputs ("xacquire ", file);
14558 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14560 else if (INTVAL (x) & IX86_HLE_RELEASE)
14561 #ifdef HAVE_AS_IX86_HLE
14562 fputs ("xrelease ", file);
14564 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14566 /* We do not want to print value of the operand. */
14570 if (ASSEMBLER_DIALECT == ASM_ATT)
14576 const char *name = get_some_local_dynamic_name ();
14578 output_operand_lossage ("'%%&' used without any "
14579 "local dynamic TLS references");
14581 assemble_name (file, name);
14590 || optimize_function_for_size_p (cfun)
14591 || !TARGET_BRANCH_PREDICTION_HINTS)
14594 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14597 int pred_val = INTVAL (XEXP (x, 0));
14599 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14600 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14602 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14604 = final_forward_branch_p (current_output_insn) == 0;
14606 /* Emit hints only in the case default branch prediction
14607 heuristics would fail. */
14608 if (taken != cputaken)
14610 /* We use 3e (DS) prefix for taken branches and
14611 2e (CS) prefix for not taken branches. */
14613 fputs ("ds ; ", file);
14615 fputs ("cs ; ", file);
14623 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14629 if (ASSEMBLER_DIALECT == ASM_ATT)
14632 /* The kernel uses a different segment register for performance
14633 reasons; a system call would not have to trash the userspace
14634 segment register, which would be expensive. */
14635 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14636 fputs ("fs", file);
14638 fputs ("gs", file);
14642 putc (TARGET_AVX2 ? 'i' : 'f', file);
14646 if (TARGET_64BIT && Pmode != word_mode)
14647 fputs ("addr32 ", file);
14651 output_operand_lossage ("invalid operand code '%c'", code);
14656 print_reg (x, code, file);
14658 else if (MEM_P (x))
14660 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14661 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14662 && GET_MODE (x) != BLKmode)
14665 switch (GET_MODE_SIZE (GET_MODE (x)))
14667 case 1: size = "BYTE"; break;
14668 case 2: size = "WORD"; break;
14669 case 4: size = "DWORD"; break;
14670 case 8: size = "QWORD"; break;
14671 case 12: size = "TBYTE"; break;
14673 if (GET_MODE (x) == XFmode)
14678 case 32: size = "YMMWORD"; break;
14680 gcc_unreachable ();
14683 /* Check for explicit size override (codes 'b', 'w', 'k',
14687 else if (code == 'w')
14689 else if (code == 'k')
14691 else if (code == 'q')
14693 else if (code == 'x')
14696 fputs (size, file);
14697 fputs (" PTR ", file);
14701 /* Avoid (%rip) for call operands. */
14702 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14703 && !CONST_INT_P (x))
14704 output_addr_const (file, x);
14705 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14706 output_operand_lossage ("invalid constraints for operand");
14708 output_address (x);
14711 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14716 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14717 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14719 if (ASSEMBLER_DIALECT == ASM_ATT)
14721 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14723 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14724 (unsigned long long) (int) l);
14726 fprintf (file, "0x%08x", (unsigned int) l);
14729 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14734 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14735 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14737 if (ASSEMBLER_DIALECT == ASM_ATT)
14739 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14742 /* These float cases don't actually occur as immediate operands. */
14743 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14747 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14748 fputs (dstr, file);
14753 /* We have patterns that allow zero sets of memory, for instance.
14754 In 64-bit mode, we should probably support all 8-byte vectors,
14755 since we can in fact encode that into an immediate. */
14756 if (GET_CODE (x) == CONST_VECTOR)
14758 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14762 if (code != 'P' && code != 'p')
14764 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14766 if (ASSEMBLER_DIALECT == ASM_ATT)
14769 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14770 || GET_CODE (x) == LABEL_REF)
14772 if (ASSEMBLER_DIALECT == ASM_ATT)
14775 fputs ("OFFSET FLAT:", file);
14778 if (CONST_INT_P (x))
14779 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14780 else if (flag_pic || MACHOPIC_INDIRECT)
14781 output_pic_addr_const (file, x, code);
14783 output_addr_const (file, x);
14788 ix86_print_operand_punct_valid_p (unsigned char code)
14790 return (code == '@' || code == '*' || code == '+' || code == '&'
14791 || code == ';' || code == '~' || code == '^');
14794 /* Print a memory operand whose address is ADDR. */
14797 ix86_print_operand_address (FILE *file, rtx addr)
14799 struct ix86_address parts;
14800 rtx base, index, disp;
14806 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14808 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14809 gcc_assert (parts.index == NULL_RTX);
14810 parts.index = XVECEXP (addr, 0, 1);
14811 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14812 addr = XVECEXP (addr, 0, 0);
14815 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14817 gcc_assert (TARGET_64BIT);
14818 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14822 ok = ix86_decompose_address (addr, &parts);
14827 index = parts.index;
14829 scale = parts.scale;
14837 if (ASSEMBLER_DIALECT == ASM_ATT)
14839 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14842 gcc_unreachable ();
14845 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14846 if (TARGET_64BIT && !base && !index)
14850 if (GET_CODE (disp) == CONST
14851 && GET_CODE (XEXP (disp, 0)) == PLUS
14852 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14853 symbol = XEXP (XEXP (disp, 0), 0);
14855 if (GET_CODE (symbol) == LABEL_REF
14856 || (GET_CODE (symbol) == SYMBOL_REF
14857 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14860 if (!base && !index)
14862 /* Displacement only requires special attention. */
14864 if (CONST_INT_P (disp))
14866 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14867 fputs ("ds:", file);
14868 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14871 output_pic_addr_const (file, disp, 0);
14873 output_addr_const (file, disp);
14877 /* Print SImode register names to force addr32 prefix. */
14878 if (SImode_address_operand (addr, VOIDmode))
14880 #ifdef ENABLE_CHECKING
14881 gcc_assert (TARGET_64BIT);
14882 switch (GET_CODE (addr))
14885 gcc_assert (GET_MODE (addr) == SImode);
14886 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14890 gcc_assert (GET_MODE (addr) == DImode);
14893 gcc_unreachable ();
14896 gcc_assert (!code);
14902 && CONST_INT_P (disp)
14903 && INTVAL (disp) < -16*1024*1024)
14905 /* X32 runs in 64-bit mode, where displacement, DISP, in
14906 address DISP(%r64), is encoded as 32-bit immediate sign-
14907 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14908 address is %r64 + 0xffffffffbffffd00. When %r64 <
14909 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14910 which is invalid for x32. The correct address is %r64
14911 - 0x40000300 == 0xf7ffdd64. To properly encode
14912 -0x40000300(%r64) for x32, we zero-extend negative
14913 displacement by forcing addr32 prefix which truncates
14914 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14915 zero-extend all negative displacements, including -1(%rsp).
14916 However, for small negative displacements, sign-extension
14917 won't cause overflow. We only zero-extend negative
14918 displacements if they < -16*1024*1024, which is also used
14919 to check legitimate address displacements for PIC. */
14923 if (ASSEMBLER_DIALECT == ASM_ATT)
14928 output_pic_addr_const (file, disp, 0);
14929 else if (GET_CODE (disp) == LABEL_REF)
14930 output_asm_label (disp);
14932 output_addr_const (file, disp);
14937 print_reg (base, code, file);
14941 print_reg (index, vsib ? 0 : code, file);
14942 if (scale != 1 || vsib)
14943 fprintf (file, ",%d", scale);
14949 rtx offset = NULL_RTX;
14953 /* Pull out the offset of a symbol; print any symbol itself. */
14954 if (GET_CODE (disp) == CONST
14955 && GET_CODE (XEXP (disp, 0)) == PLUS
14956 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14958 offset = XEXP (XEXP (disp, 0), 1);
14959 disp = gen_rtx_CONST (VOIDmode,
14960 XEXP (XEXP (disp, 0), 0));
14964 output_pic_addr_const (file, disp, 0);
14965 else if (GET_CODE (disp) == LABEL_REF)
14966 output_asm_label (disp);
14967 else if (CONST_INT_P (disp))
14970 output_addr_const (file, disp);
14976 print_reg (base, code, file);
14979 if (INTVAL (offset) >= 0)
14981 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14992 print_reg (index, vsib ? 0 : code, file);
14993 if (scale != 1 || vsib)
14994 fprintf (file, "*%d", scale);
15001 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15004 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15008 if (GET_CODE (x) != UNSPEC)
15011 op = XVECEXP (x, 0, 0);
15012 switch (XINT (x, 1))
15014 case UNSPEC_GOTTPOFF:
15015 output_addr_const (file, op);
15016 /* FIXME: This might be @TPOFF in Sun ld. */
15017 fputs ("@gottpoff", file);
15020 output_addr_const (file, op);
15021 fputs ("@tpoff", file);
15023 case UNSPEC_NTPOFF:
15024 output_addr_const (file, op);
15026 fputs ("@tpoff", file);
15028 fputs ("@ntpoff", file);
15030 case UNSPEC_DTPOFF:
15031 output_addr_const (file, op);
15032 fputs ("@dtpoff", file);
15034 case UNSPEC_GOTNTPOFF:
15035 output_addr_const (file, op);
15037 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15038 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15040 fputs ("@gotntpoff", file);
15042 case UNSPEC_INDNTPOFF:
15043 output_addr_const (file, op);
15044 fputs ("@indntpoff", file);
15047 case UNSPEC_MACHOPIC_OFFSET:
15048 output_addr_const (file, op);
15050 machopic_output_function_base_name (file);
15054 case UNSPEC_STACK_CHECK:
15058 gcc_assert (flag_split_stack);
15060 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15061 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15063 gcc_unreachable ();
15066 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15077 /* Split one or more double-mode RTL references into pairs of half-mode
15078 references. The RTL can be REG, offsettable MEM, integer constant, or
15079 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15080 split and "num" is its length. lo_half and hi_half are output arrays
15081 that parallel "operands". */
15084 split_double_mode (enum machine_mode mode, rtx operands[],
15085 int num, rtx lo_half[], rtx hi_half[])
15087 enum machine_mode half_mode;
15093 half_mode = DImode;
15096 half_mode = SImode;
15099 gcc_unreachable ();
15102 byte = GET_MODE_SIZE (half_mode);
15106 rtx op = operands[num];
15108 /* simplify_subreg refuse to split volatile memory addresses,
15109 but we still have to handle it. */
15112 lo_half[num] = adjust_address (op, half_mode, 0);
15113 hi_half[num] = adjust_address (op, half_mode, byte);
15117 lo_half[num] = simplify_gen_subreg (half_mode, op,
15118 GET_MODE (op) == VOIDmode
15119 ? mode : GET_MODE (op), 0);
15120 hi_half[num] = simplify_gen_subreg (half_mode, op,
15121 GET_MODE (op) == VOIDmode
15122 ? mode : GET_MODE (op), byte);
15127 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15128 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15129 is the expression of the binary operation. The output may either be
15130 emitted here, or returned to the caller, like all output_* functions.
15132 There is no guarantee that the operands are the same mode, as they
15133 might be within FLOAT or FLOAT_EXTEND expressions. */
15135 #ifndef SYSV386_COMPAT
15136 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15137 wants to fix the assemblers because that causes incompatibility
15138 with gcc. No-one wants to fix gcc because that causes
15139 incompatibility with assemblers... You can use the option of
15140 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15141 #define SYSV386_COMPAT 1
15145 output_387_binary_op (rtx insn, rtx *operands)
15147 static char buf[40];
15150 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15152 #ifdef ENABLE_CHECKING
15153 /* Even if we do not want to check the inputs, this documents input
15154 constraints. Which helps in understanding the following code. */
15155 if (STACK_REG_P (operands[0])
15156 && ((REG_P (operands[1])
15157 && REGNO (operands[0]) == REGNO (operands[1])
15158 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15159 || (REG_P (operands[2])
15160 && REGNO (operands[0]) == REGNO (operands[2])
15161 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15162 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15165 gcc_assert (is_sse);
15168 switch (GET_CODE (operands[3]))
15171 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15172 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15180 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15181 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15189 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15190 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15198 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15199 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15207 gcc_unreachable ();
15214 strcpy (buf, ssep);
15215 if (GET_MODE (operands[0]) == SFmode)
15216 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15218 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15222 strcpy (buf, ssep + 1);
15223 if (GET_MODE (operands[0]) == SFmode)
15224 strcat (buf, "ss\t{%2, %0|%0, %2}");
15226 strcat (buf, "sd\t{%2, %0|%0, %2}");
15232 switch (GET_CODE (operands[3]))
15236 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15238 rtx temp = operands[2];
15239 operands[2] = operands[1];
15240 operands[1] = temp;
15243 /* know operands[0] == operands[1]. */
15245 if (MEM_P (operands[2]))
15251 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15253 if (STACK_TOP_P (operands[0]))
15254 /* How is it that we are storing to a dead operand[2]?
15255 Well, presumably operands[1] is dead too. We can't
15256 store the result to st(0) as st(0) gets popped on this
15257 instruction. Instead store to operands[2] (which I
15258 think has to be st(1)). st(1) will be popped later.
15259 gcc <= 2.8.1 didn't have this check and generated
15260 assembly code that the Unixware assembler rejected. */
15261 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15263 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15267 if (STACK_TOP_P (operands[0]))
15268 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15270 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15275 if (MEM_P (operands[1]))
15281 if (MEM_P (operands[2]))
15287 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15290 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15291 derived assemblers, confusingly reverse the direction of
15292 the operation for fsub{r} and fdiv{r} when the
15293 destination register is not st(0). The Intel assembler
15294 doesn't have this brain damage. Read !SYSV386_COMPAT to
15295 figure out what the hardware really does. */
15296 if (STACK_TOP_P (operands[0]))
15297 p = "{p\t%0, %2|rp\t%2, %0}";
15299 p = "{rp\t%2, %0|p\t%0, %2}";
15301 if (STACK_TOP_P (operands[0]))
15302 /* As above for fmul/fadd, we can't store to st(0). */
15303 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15305 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15310 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15313 if (STACK_TOP_P (operands[0]))
15314 p = "{rp\t%0, %1|p\t%1, %0}";
15316 p = "{p\t%1, %0|rp\t%0, %1}";
15318 if (STACK_TOP_P (operands[0]))
15319 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15321 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15326 if (STACK_TOP_P (operands[0]))
15328 if (STACK_TOP_P (operands[1]))
15329 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15331 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15334 else if (STACK_TOP_P (operands[1]))
15337 p = "{\t%1, %0|r\t%0, %1}";
15339 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15345 p = "{r\t%2, %0|\t%0, %2}";
15347 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15353 gcc_unreachable ();
15360 /* Check if a 256bit AVX register is referenced inside of EXP. */
15363 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
15367 if (GET_CODE (exp) == SUBREG)
15368 exp = SUBREG_REG (exp);
15371 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
15377 /* Return needed mode for entity in optimize_mode_switching pass. */
15380 ix86_avx_u128_mode_needed (rtx insn)
15386 /* Needed mode is set to AVX_U128_CLEAN if there are
15387 no 256bit modes used in function arguments. */
15388 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15390 link = XEXP (link, 1))
15392 if (GET_CODE (XEXP (link, 0)) == USE)
15394 rtx arg = XEXP (XEXP (link, 0), 0);
15396 if (ix86_check_avx256_register (&arg, NULL))
15397 return AVX_U128_ANY;
15401 return AVX_U128_CLEAN;
15404 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15405 changes state only when a 256bit register is written to, but we need
15406 to prevent the compiler from moving optimal insertion point above
15407 eventual read from 256bit register. */
15408 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
15409 return AVX_U128_DIRTY;
15411 return AVX_U128_ANY;
15414 /* Return mode that i387 must be switched into
15415 prior to the execution of insn. */
15418 ix86_i387_mode_needed (int entity, rtx insn)
15420 enum attr_i387_cw mode;
15422 /* The mode UNINITIALIZED is used to store control word after a
15423 function call or ASM pattern. The mode ANY specify that function
15424 has no requirements on the control word and make no changes in the
15425 bits we are interested in. */
15428 || (NONJUMP_INSN_P (insn)
15429 && (asm_noperands (PATTERN (insn)) >= 0
15430 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15431 return I387_CW_UNINITIALIZED;
15433 if (recog_memoized (insn) < 0)
15434 return I387_CW_ANY;
15436 mode = get_attr_i387_cw (insn);
15441 if (mode == I387_CW_TRUNC)
15446 if (mode == I387_CW_FLOOR)
15451 if (mode == I387_CW_CEIL)
15456 if (mode == I387_CW_MASK_PM)
15461 gcc_unreachable ();
15464 return I387_CW_ANY;
15467 /* Return mode that entity must be switched into
15468 prior to the execution of insn. */
15471 ix86_mode_needed (int entity, rtx insn)
15476 return ix86_avx_u128_mode_needed (insn);
15481 return ix86_i387_mode_needed (entity, insn);
15483 gcc_unreachable ();
15488 /* Check if a 256bit AVX register is referenced in stores. */
15491 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
15493 if (ix86_check_avx256_register (&dest, NULL))
15495 bool *used = (bool *) data;
15500 /* Calculate mode of upper 128bit AVX registers after the insn. */
15503 ix86_avx_u128_mode_after (int mode, rtx insn)
15505 rtx pat = PATTERN (insn);
15507 if (vzeroupper_operation (pat, VOIDmode)
15508 || vzeroall_operation (pat, VOIDmode))
15509 return AVX_U128_CLEAN;
15511 /* We know that state is clean after CALL insn if there are no
15512 256bit registers used in the function return register. */
15515 bool avx_reg256_found = false;
15516 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
15517 if (!avx_reg256_found)
15518 return AVX_U128_CLEAN;
15521 /* Otherwise, return current mode. Remember that if insn
15522 references AVX 256bit registers, the mode was already changed
15523 to DIRTY from MODE_NEEDED. */
15527 /* Return the mode that an insn results in. */
15530 ix86_mode_after (int entity, int mode, rtx insn)
15535 return ix86_avx_u128_mode_after (mode, insn);
15542 gcc_unreachable ();
15547 ix86_avx_u128_mode_entry (void)
15551 /* Entry mode is set to AVX_U128_DIRTY if there are
15552 256bit modes used in function arguments. */
15553 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15554 arg = TREE_CHAIN (arg))
15556 rtx incoming = DECL_INCOMING_RTL (arg);
15558 if (incoming && ix86_check_avx256_register (&incoming, NULL))
15559 return AVX_U128_DIRTY;
15562 return AVX_U128_CLEAN;
15565 /* Return a mode that ENTITY is assumed to be
15566 switched to at function entry. */
15569 ix86_mode_entry (int entity)
15574 return ix86_avx_u128_mode_entry ();
15579 return I387_CW_ANY;
15581 gcc_unreachable ();
15586 ix86_avx_u128_mode_exit (void)
15588 rtx reg = crtl->return_rtx;
15590 /* Exit mode is set to AVX_U128_DIRTY if there are
15591 256bit modes used in the function return register. */
15592 if (reg && ix86_check_avx256_register (®, NULL))
15593 return AVX_U128_DIRTY;
15595 return AVX_U128_CLEAN;
15598 /* Return a mode that ENTITY is assumed to be
15599 switched to at function exit. */
15602 ix86_mode_exit (int entity)
15607 return ix86_avx_u128_mode_exit ();
15612 return I387_CW_ANY;
15614 gcc_unreachable ();
15618 /* Output code to initialize control word copies used by trunc?f?i and
15619 rounding patterns. CURRENT_MODE is set to current control word,
15620 while NEW_MODE is set to new control word. */
15623 emit_i387_cw_initialization (int mode)
15625 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15628 enum ix86_stack_slot slot;
15630 rtx reg = gen_reg_rtx (HImode);
15632 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15633 emit_move_insn (reg, copy_rtx (stored_mode));
15635 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15636 || optimize_function_for_size_p (cfun))
15640 case I387_CW_TRUNC:
15641 /* round toward zero (truncate) */
15642 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15643 slot = SLOT_CW_TRUNC;
15646 case I387_CW_FLOOR:
15647 /* round down toward -oo */
15648 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15649 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15650 slot = SLOT_CW_FLOOR;
15654 /* round up toward +oo */
15655 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15656 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15657 slot = SLOT_CW_CEIL;
15660 case I387_CW_MASK_PM:
15661 /* mask precision exception for nearbyint() */
15662 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15663 slot = SLOT_CW_MASK_PM;
15667 gcc_unreachable ();
15674 case I387_CW_TRUNC:
15675 /* round toward zero (truncate) */
15676 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15677 slot = SLOT_CW_TRUNC;
15680 case I387_CW_FLOOR:
15681 /* round down toward -oo */
15682 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15683 slot = SLOT_CW_FLOOR;
15687 /* round up toward +oo */
15688 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15689 slot = SLOT_CW_CEIL;
15692 case I387_CW_MASK_PM:
15693 /* mask precision exception for nearbyint() */
15694 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15695 slot = SLOT_CW_MASK_PM;
15699 gcc_unreachable ();
15703 gcc_assert (slot < MAX_386_STACK_LOCALS);
15705 new_mode = assign_386_stack_local (HImode, slot);
15706 emit_move_insn (new_mode, reg);
15709 /* Emit vzeroupper. */
15712 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
15716 /* Cancel automatic vzeroupper insertion if there are
15717 live call-saved SSE registers at the insertion point. */
15719 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15720 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
15724 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15725 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
15728 emit_insn (gen_avx_vzeroupper ());
15731 /* Generate one or more insns to set ENTITY to MODE. */
15734 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
15739 if (mode == AVX_U128_CLEAN)
15740 ix86_avx_emit_vzeroupper (regs_live);
15746 if (mode != I387_CW_ANY
15747 && mode != I387_CW_UNINITIALIZED)
15748 emit_i387_cw_initialization (mode);
15751 gcc_unreachable ();
15755 /* Output code for INSN to convert a float to a signed int. OPERANDS
15756 are the insn operands. The output may be [HSD]Imode and the input
15757 operand may be [SDX]Fmode. */
15760 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15762 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15763 int dimode_p = GET_MODE (operands[0]) == DImode;
15764 int round_mode = get_attr_i387_cw (insn);
15766 /* Jump through a hoop or two for DImode, since the hardware has no
15767 non-popping instruction. We used to do this a different way, but
15768 that was somewhat fragile and broke with post-reload splitters. */
15769 if ((dimode_p || fisttp) && !stack_top_dies)
15770 output_asm_insn ("fld\t%y1", operands);
15772 gcc_assert (STACK_TOP_P (operands[1]));
15773 gcc_assert (MEM_P (operands[0]));
15774 gcc_assert (GET_MODE (operands[1]) != TFmode);
15777 output_asm_insn ("fisttp%Z0\t%0", operands);
15780 if (round_mode != I387_CW_ANY)
15781 output_asm_insn ("fldcw\t%3", operands);
15782 if (stack_top_dies || dimode_p)
15783 output_asm_insn ("fistp%Z0\t%0", operands);
15785 output_asm_insn ("fist%Z0\t%0", operands);
15786 if (round_mode != I387_CW_ANY)
15787 output_asm_insn ("fldcw\t%2", operands);
15793 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15794 have the values zero or one, indicates the ffreep insn's operand
15795 from the OPERANDS array. */
15797 static const char *
15798 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15800 if (TARGET_USE_FFREEP)
15801 #ifdef HAVE_AS_IX86_FFREEP
15802 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15805 static char retval[32];
15806 int regno = REGNO (operands[opno]);
15808 gcc_assert (STACK_REGNO_P (regno));
15810 regno -= FIRST_STACK_REG;
15812 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15817 return opno ? "fstp\t%y1" : "fstp\t%y0";
15821 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15822 should be used. UNORDERED_P is true when fucom should be used. */
15825 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15827 int stack_top_dies;
15828 rtx cmp_op0, cmp_op1;
15829 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15833 cmp_op0 = operands[0];
15834 cmp_op1 = operands[1];
15838 cmp_op0 = operands[1];
15839 cmp_op1 = operands[2];
15844 if (GET_MODE (operands[0]) == SFmode)
15846 return "%vucomiss\t{%1, %0|%0, %1}";
15848 return "%vcomiss\t{%1, %0|%0, %1}";
15851 return "%vucomisd\t{%1, %0|%0, %1}";
15853 return "%vcomisd\t{%1, %0|%0, %1}";
15856 gcc_assert (STACK_TOP_P (cmp_op0));
15858 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15860 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15862 if (stack_top_dies)
15864 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15865 return output_387_ffreep (operands, 1);
15868 return "ftst\n\tfnstsw\t%0";
15871 if (STACK_REG_P (cmp_op1)
15873 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15874 && REGNO (cmp_op1) != FIRST_STACK_REG)
15876 /* If both the top of the 387 stack dies, and the other operand
15877 is also a stack register that dies, then this must be a
15878 `fcompp' float compare */
15882 /* There is no double popping fcomi variant. Fortunately,
15883 eflags is immune from the fstp's cc clobbering. */
15885 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15887 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15888 return output_387_ffreep (operands, 0);
15893 return "fucompp\n\tfnstsw\t%0";
15895 return "fcompp\n\tfnstsw\t%0";
15900 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15902 static const char * const alt[16] =
15904 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15905 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15906 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15907 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15909 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15910 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15914 "fcomi\t{%y1, %0|%0, %y1}",
15915 "fcomip\t{%y1, %0|%0, %y1}",
15916 "fucomi\t{%y1, %0|%0, %y1}",
15917 "fucomip\t{%y1, %0|%0, %y1}",
15928 mask = eflags_p << 3;
15929 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15930 mask |= unordered_p << 1;
15931 mask |= stack_top_dies;
15933 gcc_assert (mask < 16);
15942 ix86_output_addr_vec_elt (FILE *file, int value)
15944 const char *directive = ASM_LONG;
15948 directive = ASM_QUAD;
15950 gcc_assert (!TARGET_64BIT);
15953 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15957 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15959 const char *directive = ASM_LONG;
15962 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15963 directive = ASM_QUAD;
15965 gcc_assert (!TARGET_64BIT);
15967 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15968 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15969 fprintf (file, "%s%s%d-%s%d\n",
15970 directive, LPREFIX, value, LPREFIX, rel);
15971 else if (HAVE_AS_GOTOFF_IN_DATA)
15972 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15974 else if (TARGET_MACHO)
15976 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15977 machopic_output_function_base_name (file);
15982 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15983 GOT_SYMBOL_NAME, LPREFIX, value);
15986 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15990 ix86_expand_clear (rtx dest)
15994 /* We play register width games, which are only valid after reload. */
15995 gcc_assert (reload_completed);
15997 /* Avoid HImode and its attendant prefix byte. */
15998 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15999 dest = gen_rtx_REG (SImode, REGNO (dest));
16000 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16002 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16003 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16005 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16006 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16012 /* X is an unchanging MEM. If it is a constant pool reference, return
16013 the constant pool rtx, else NULL. */
16016 maybe_get_pool_constant (rtx x)
16018 x = ix86_delegitimize_address (XEXP (x, 0));
16020 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16021 return get_pool_constant (x);
16027 ix86_expand_move (enum machine_mode mode, rtx operands[])
16030 enum tls_model model;
16035 if (GET_CODE (op1) == SYMBOL_REF)
16037 model = SYMBOL_REF_TLS_MODEL (op1);
16040 op1 = legitimize_tls_address (op1, model, true);
16041 op1 = force_operand (op1, op0);
16044 op1 = convert_to_mode (mode, op1, 1);
16046 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
16047 && SYMBOL_REF_DLLIMPORT_P (op1))
16048 op1 = legitimize_dllimport_symbol (op1, false);
16050 else if (GET_CODE (op1) == CONST
16051 && GET_CODE (XEXP (op1, 0)) == PLUS
16052 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16054 rtx addend = XEXP (XEXP (op1, 0), 1);
16055 rtx symbol = XEXP (XEXP (op1, 0), 0);
16058 model = SYMBOL_REF_TLS_MODEL (symbol);
16060 tmp = legitimize_tls_address (symbol, model, true);
16061 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
16062 && SYMBOL_REF_DLLIMPORT_P (symbol))
16063 tmp = legitimize_dllimport_symbol (symbol, true);
16067 tmp = force_operand (tmp, NULL);
16068 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16069 op0, 1, OPTAB_DIRECT);
16072 op1 = convert_to_mode (mode, tmp, 1);
16076 if ((flag_pic || MACHOPIC_INDIRECT)
16077 && symbolic_operand (op1, mode))
16079 if (TARGET_MACHO && !TARGET_64BIT)
16082 /* dynamic-no-pic */
16083 if (MACHOPIC_INDIRECT)
16085 rtx temp = ((reload_in_progress
16086 || ((op0 && REG_P (op0))
16088 ? op0 : gen_reg_rtx (Pmode));
16089 op1 = machopic_indirect_data_reference (op1, temp);
16091 op1 = machopic_legitimize_pic_address (op1, mode,
16092 temp == op1 ? 0 : temp);
16094 if (op0 != op1 && GET_CODE (op0) != MEM)
16096 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16100 if (GET_CODE (op0) == MEM)
16101 op1 = force_reg (Pmode, op1);
16105 if (GET_CODE (temp) != REG)
16106 temp = gen_reg_rtx (Pmode);
16107 temp = legitimize_pic_address (op1, temp);
16112 /* dynamic-no-pic */
16118 op1 = force_reg (mode, op1);
16119 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16121 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16122 op1 = legitimize_pic_address (op1, reg);
16125 op1 = convert_to_mode (mode, op1, 1);
16132 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16133 || !push_operand (op0, mode))
16135 op1 = force_reg (mode, op1);
16137 if (push_operand (op0, mode)
16138 && ! general_no_elim_operand (op1, mode))
16139 op1 = copy_to_mode_reg (mode, op1);
16141 /* Force large constants in 64bit compilation into register
16142 to get them CSEed. */
16143 if (can_create_pseudo_p ()
16144 && (mode == DImode) && TARGET_64BIT
16145 && immediate_operand (op1, mode)
16146 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16147 && !register_operand (op0, mode)
16149 op1 = copy_to_mode_reg (mode, op1);
16151 if (can_create_pseudo_p ()
16152 && FLOAT_MODE_P (mode)
16153 && GET_CODE (op1) == CONST_DOUBLE)
16155 /* If we are loading a floating point constant to a register,
16156 force the value to memory now, since we'll get better code
16157 out the back end. */
16159 op1 = validize_mem (force_const_mem (mode, op1));
16160 if (!register_operand (op0, mode))
16162 rtx temp = gen_reg_rtx (mode);
16163 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16164 emit_move_insn (op0, temp);
16170 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16174 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16176 rtx op0 = operands[0], op1 = operands[1];
16177 unsigned int align = GET_MODE_ALIGNMENT (mode);
16179 /* Force constants other than zero into memory. We do not know how
16180 the instructions used to build constants modify the upper 64 bits
16181 of the register, once we have that information we may be able
16182 to handle some of them more efficiently. */
16183 if (can_create_pseudo_p ()
16184 && register_operand (op0, mode)
16185 && (CONSTANT_P (op1)
16186 || (GET_CODE (op1) == SUBREG
16187 && CONSTANT_P (SUBREG_REG (op1))))
16188 && !standard_sse_constant_p (op1))
16189 op1 = validize_mem (force_const_mem (mode, op1));
16191 /* We need to check memory alignment for SSE mode since attribute
16192 can make operands unaligned. */
16193 if (can_create_pseudo_p ()
16194 && SSE_REG_MODE_P (mode)
16195 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16196 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16200 /* ix86_expand_vector_move_misalign() does not like constants ... */
16201 if (CONSTANT_P (op1)
16202 || (GET_CODE (op1) == SUBREG
16203 && CONSTANT_P (SUBREG_REG (op1))))
16204 op1 = validize_mem (force_const_mem (mode, op1));
16206 /* ... nor both arguments in memory. */
16207 if (!register_operand (op0, mode)
16208 && !register_operand (op1, mode))
16209 op1 = force_reg (mode, op1);
16211 tmp[0] = op0; tmp[1] = op1;
16212 ix86_expand_vector_move_misalign (mode, tmp);
16216 /* Make operand1 a register if it isn't already. */
16217 if (can_create_pseudo_p ()
16218 && !register_operand (op0, mode)
16219 && !register_operand (op1, mode))
16221 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16225 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16228 /* Split 32-byte AVX unaligned load and store if needed. */
16231 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16234 rtx (*extract) (rtx, rtx, rtx);
16235 rtx (*load_unaligned) (rtx, rtx);
16236 rtx (*store_unaligned) (rtx, rtx);
16237 enum machine_mode mode;
16239 switch (GET_MODE (op0))
16242 gcc_unreachable ();
16244 extract = gen_avx_vextractf128v32qi;
16245 load_unaligned = gen_avx_loaddqu256;
16246 store_unaligned = gen_avx_storedqu256;
16250 extract = gen_avx_vextractf128v8sf;
16251 load_unaligned = gen_avx_loadups256;
16252 store_unaligned = gen_avx_storeups256;
16256 extract = gen_avx_vextractf128v4df;
16257 load_unaligned = gen_avx_loadupd256;
16258 store_unaligned = gen_avx_storeupd256;
16265 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16267 rtx r = gen_reg_rtx (mode);
16268 m = adjust_address (op1, mode, 0);
16269 emit_move_insn (r, m);
16270 m = adjust_address (op1, mode, 16);
16271 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16272 emit_move_insn (op0, r);
16275 emit_insn (load_unaligned (op0, op1));
16277 else if (MEM_P (op0))
16279 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16281 m = adjust_address (op0, mode, 0);
16282 emit_insn (extract (m, op1, const0_rtx));
16283 m = adjust_address (op0, mode, 16);
16284 emit_insn (extract (m, op1, const1_rtx));
16287 emit_insn (store_unaligned (op0, op1));
16290 gcc_unreachable ();
16293 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16294 straight to ix86_expand_vector_move. */
16295 /* Code generation for scalar reg-reg moves of single and double precision data:
16296 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16300 if (x86_sse_partial_reg_dependency == true)
16305 Code generation for scalar loads of double precision data:
16306 if (x86_sse_split_regs == true)
16307 movlpd mem, reg (gas syntax)
16311 Code generation for unaligned packed loads of single precision data
16312 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16313 if (x86_sse_unaligned_move_optimal)
16316 if (x86_sse_partial_reg_dependency == true)
16328 Code generation for unaligned packed loads of double precision data
16329 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16330 if (x86_sse_unaligned_move_optimal)
16333 if (x86_sse_split_regs == true)
16346 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
16354 && GET_MODE_SIZE (mode) == 32)
16356 switch (GET_MODE_CLASS (mode))
16358 case MODE_VECTOR_INT:
16360 op0 = gen_lowpart (V32QImode, op0);
16361 op1 = gen_lowpart (V32QImode, op1);
16364 case MODE_VECTOR_FLOAT:
16365 ix86_avx256_split_vector_move_misalign (op0, op1);
16369 gcc_unreachable ();
16377 /* ??? If we have typed data, then it would appear that using
16378 movdqu is the only way to get unaligned data loaded with
16380 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16382 op0 = gen_lowpart (V16QImode, op0);
16383 op1 = gen_lowpart (V16QImode, op1);
16384 /* We will eventually emit movups based on insn attributes. */
16385 emit_insn (gen_sse2_loaddqu (op0, op1));
16387 else if (TARGET_SSE2 && mode == V2DFmode)
16392 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16393 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16394 || optimize_function_for_size_p (cfun))
16396 /* We will eventually emit movups based on insn attributes. */
16397 emit_insn (gen_sse2_loadupd (op0, op1));
16401 /* When SSE registers are split into halves, we can avoid
16402 writing to the top half twice. */
16403 if (TARGET_SSE_SPLIT_REGS)
16405 emit_clobber (op0);
16410 /* ??? Not sure about the best option for the Intel chips.
16411 The following would seem to satisfy; the register is
16412 entirely cleared, breaking the dependency chain. We
16413 then store to the upper half, with a dependency depth
16414 of one. A rumor has it that Intel recommends two movsd
16415 followed by an unpacklpd, but this is unconfirmed. And
16416 given that the dependency depth of the unpacklpd would
16417 still be one, I'm not sure why this would be better. */
16418 zero = CONST0_RTX (V2DFmode);
16421 m = adjust_address (op1, DFmode, 0);
16422 emit_insn (gen_sse2_loadlpd (op0, zero, m));
16423 m = adjust_address (op1, DFmode, 8);
16424 emit_insn (gen_sse2_loadhpd (op0, op0, m));
16429 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16430 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16431 || optimize_function_for_size_p (cfun))
16433 op0 = gen_lowpart (V4SFmode, op0);
16434 op1 = gen_lowpart (V4SFmode, op1);
16435 emit_insn (gen_sse_loadups (op0, op1));
16439 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
16440 emit_move_insn (op0, CONST0_RTX (mode));
16442 emit_clobber (op0);
16444 if (mode != V4SFmode)
16445 op0 = gen_lowpart (V4SFmode, op0);
16447 m = adjust_address (op1, V2SFmode, 0);
16448 emit_insn (gen_sse_loadlps (op0, op0, m));
16449 m = adjust_address (op1, V2SFmode, 8);
16450 emit_insn (gen_sse_loadhps (op0, op0, m));
16453 else if (MEM_P (op0))
16455 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16457 op0 = gen_lowpart (V16QImode, op0);
16458 op1 = gen_lowpart (V16QImode, op1);
16459 /* We will eventually emit movups based on insn attributes. */
16460 emit_insn (gen_sse2_storedqu (op0, op1));
16462 else if (TARGET_SSE2 && mode == V2DFmode)
16465 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16466 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16467 || optimize_function_for_size_p (cfun))
16468 /* We will eventually emit movups based on insn attributes. */
16469 emit_insn (gen_sse2_storeupd (op0, op1));
16472 m = adjust_address (op0, DFmode, 0);
16473 emit_insn (gen_sse2_storelpd (m, op1));
16474 m = adjust_address (op0, DFmode, 8);
16475 emit_insn (gen_sse2_storehpd (m, op1));
16480 if (mode != V4SFmode)
16481 op1 = gen_lowpart (V4SFmode, op1);
16484 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16485 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16486 || optimize_function_for_size_p (cfun))
16488 op0 = gen_lowpart (V4SFmode, op0);
16489 emit_insn (gen_sse_storeups (op0, op1));
16493 m = adjust_address (op0, V2SFmode, 0);
16494 emit_insn (gen_sse_storelps (m, op1));
16495 m = adjust_address (op0, V2SFmode, 8);
16496 emit_insn (gen_sse_storehps (m, op1));
16501 gcc_unreachable ();
16504 /* Expand a push in MODE. This is some mode for which we do not support
16505 proper push instructions, at least from the registers that we expect
16506 the value to live in. */
16509 ix86_expand_push (enum machine_mode mode, rtx x)
16513 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16514 GEN_INT (-GET_MODE_SIZE (mode)),
16515 stack_pointer_rtx, 1, OPTAB_DIRECT);
16516 if (tmp != stack_pointer_rtx)
16517 emit_move_insn (stack_pointer_rtx, tmp);
16519 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16521 /* When we push an operand onto stack, it has to be aligned at least
16522 at the function argument boundary. However since we don't have
16523 the argument type, we can't determine the actual argument
16525 emit_move_insn (tmp, x);
16528 /* Helper function of ix86_fixup_binary_operands to canonicalize
16529 operand order. Returns true if the operands should be swapped. */
16532 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16535 rtx dst = operands[0];
16536 rtx src1 = operands[1];
16537 rtx src2 = operands[2];
16539 /* If the operation is not commutative, we can't do anything. */
16540 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16543 /* Highest priority is that src1 should match dst. */
16544 if (rtx_equal_p (dst, src1))
16546 if (rtx_equal_p (dst, src2))
16549 /* Next highest priority is that immediate constants come second. */
16550 if (immediate_operand (src2, mode))
16552 if (immediate_operand (src1, mode))
16555 /* Lowest priority is that memory references should come second. */
16565 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16566 destination to use for the operation. If different from the true
16567 destination in operands[0], a copy operation will be required. */
16570 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16573 rtx dst = operands[0];
16574 rtx src1 = operands[1];
16575 rtx src2 = operands[2];
16577 /* Canonicalize operand order. */
16578 if (ix86_swap_binary_operands_p (code, mode, operands))
16582 /* It is invalid to swap operands of different modes. */
16583 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16590 /* Both source operands cannot be in memory. */
16591 if (MEM_P (src1) && MEM_P (src2))
16593 /* Optimization: Only read from memory once. */
16594 if (rtx_equal_p (src1, src2))
16596 src2 = force_reg (mode, src2);
16600 src2 = force_reg (mode, src2);
16603 /* If the destination is memory, and we do not have matching source
16604 operands, do things in registers. */
16605 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16606 dst = gen_reg_rtx (mode);
16608 /* Source 1 cannot be a constant. */
16609 if (CONSTANT_P (src1))
16610 src1 = force_reg (mode, src1);
16612 /* Source 1 cannot be a non-matching memory. */
16613 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16614 src1 = force_reg (mode, src1);
16616 /* Improve address combine. */
16618 && GET_MODE_CLASS (mode) == MODE_INT
16620 src2 = force_reg (mode, src2);
16622 operands[1] = src1;
16623 operands[2] = src2;
16627 /* Similarly, but assume that the destination has already been
16628 set up properly. */
16631 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16632 enum machine_mode mode, rtx operands[])
16634 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16635 gcc_assert (dst == operands[0]);
16638 /* Attempt to expand a binary operator. Make the expansion closer to the
16639 actual machine, then just general_operand, which will allow 3 separate
16640 memory references (one output, two input) in a single insn. */
16643 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16646 rtx src1, src2, dst, op, clob;
16648 dst = ix86_fixup_binary_operands (code, mode, operands);
16649 src1 = operands[1];
16650 src2 = operands[2];
16652 /* Emit the instruction. */
16654 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16655 if (reload_in_progress)
16657 /* Reload doesn't know about the flags register, and doesn't know that
16658 it doesn't want to clobber it. We can only do this with PLUS. */
16659 gcc_assert (code == PLUS);
16662 else if (reload_completed
16664 && !rtx_equal_p (dst, src1))
16666 /* This is going to be an LEA; avoid splitting it later. */
16671 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16672 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16675 /* Fix up the destination if needed. */
16676 if (dst != operands[0])
16677 emit_move_insn (operands[0], dst);
16680 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16681 the given OPERANDS. */
16684 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
16687 rtx op1 = NULL_RTX, op2 = NULL_RTX;
16688 if (GET_CODE (operands[1]) == SUBREG)
16693 else if (GET_CODE (operands[2]) == SUBREG)
16698 /* Optimize (__m128i) d | (__m128i) e and similar code
16699 when d and e are float vectors into float vector logical
16700 insn. In C/C++ without using intrinsics there is no other way
16701 to express vector logical operation on float vectors than
16702 to cast them temporarily to integer vectors. */
16704 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16705 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
16706 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
16707 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
16708 && SUBREG_BYTE (op1) == 0
16709 && (GET_CODE (op2) == CONST_VECTOR
16710 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
16711 && SUBREG_BYTE (op2) == 0))
16712 && can_create_pseudo_p ())
16715 switch (GET_MODE (SUBREG_REG (op1)))
16721 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
16722 if (GET_CODE (op2) == CONST_VECTOR)
16724 op2 = gen_lowpart (GET_MODE (dst), op2);
16725 op2 = force_reg (GET_MODE (dst), op2);
16730 op2 = SUBREG_REG (operands[2]);
16731 if (!nonimmediate_operand (op2, GET_MODE (dst)))
16732 op2 = force_reg (GET_MODE (dst), op2);
16734 op1 = SUBREG_REG (op1);
16735 if (!nonimmediate_operand (op1, GET_MODE (dst)))
16736 op1 = force_reg (GET_MODE (dst), op1);
16737 emit_insn (gen_rtx_SET (VOIDmode, dst,
16738 gen_rtx_fmt_ee (code, GET_MODE (dst),
16740 emit_move_insn (operands[0], gen_lowpart (mode, dst));
16746 if (!nonimmediate_operand (operands[1], mode))
16747 operands[1] = force_reg (mode, operands[1]);
16748 if (!nonimmediate_operand (operands[2], mode))
16749 operands[2] = force_reg (mode, operands[2]);
16750 ix86_fixup_binary_operands_no_copy (code, mode, operands);
16751 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16752 gen_rtx_fmt_ee (code, mode, operands[1],
16756 /* Return TRUE or FALSE depending on whether the binary operator meets the
16757 appropriate constraints. */
16760 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16763 rtx dst = operands[0];
16764 rtx src1 = operands[1];
16765 rtx src2 = operands[2];
16767 /* Both source operands cannot be in memory. */
16768 if (MEM_P (src1) && MEM_P (src2))
16771 /* Canonicalize operand order for commutative operators. */
16772 if (ix86_swap_binary_operands_p (code, mode, operands))
16779 /* If the destination is memory, we must have a matching source operand. */
16780 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16783 /* Source 1 cannot be a constant. */
16784 if (CONSTANT_P (src1))
16787 /* Source 1 cannot be a non-matching memory. */
16788 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16789 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16790 return (code == AND
16793 || (TARGET_64BIT && mode == DImode))
16794 && satisfies_constraint_L (src2));
16799 /* Attempt to expand a unary operator. Make the expansion closer to the
16800 actual machine, then just general_operand, which will allow 2 separate
16801 memory references (one output, one input) in a single insn. */
16804 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16807 int matching_memory;
16808 rtx src, dst, op, clob;
16813 /* If the destination is memory, and we do not have matching source
16814 operands, do things in registers. */
16815 matching_memory = 0;
16818 if (rtx_equal_p (dst, src))
16819 matching_memory = 1;
16821 dst = gen_reg_rtx (mode);
16824 /* When source operand is memory, destination must match. */
16825 if (MEM_P (src) && !matching_memory)
16826 src = force_reg (mode, src);
16828 /* Emit the instruction. */
16830 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16831 if (reload_in_progress || code == NOT)
16833 /* Reload doesn't know about the flags register, and doesn't know that
16834 it doesn't want to clobber it. */
16835 gcc_assert (code == NOT);
16840 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16841 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16844 /* Fix up the destination if needed. */
16845 if (dst != operands[0])
16846 emit_move_insn (operands[0], dst);
16849 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16850 divisor are within the range [0-255]. */
16853 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16856 rtx end_label, qimode_label;
16857 rtx insn, div, mod;
16858 rtx scratch, tmp0, tmp1, tmp2;
16859 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16860 rtx (*gen_zero_extend) (rtx, rtx);
16861 rtx (*gen_test_ccno_1) (rtx, rtx);
16866 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16867 gen_test_ccno_1 = gen_testsi_ccno_1;
16868 gen_zero_extend = gen_zero_extendqisi2;
16871 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16872 gen_test_ccno_1 = gen_testdi_ccno_1;
16873 gen_zero_extend = gen_zero_extendqidi2;
16876 gcc_unreachable ();
16879 end_label = gen_label_rtx ();
16880 qimode_label = gen_label_rtx ();
16882 scratch = gen_reg_rtx (mode);
16884 /* Use 8bit unsigned divimod if dividend and divisor are within
16885 the range [0-255]. */
16886 emit_move_insn (scratch, operands[2]);
16887 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16888 scratch, 1, OPTAB_DIRECT);
16889 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16890 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16891 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16892 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16893 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16895 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16896 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16897 JUMP_LABEL (insn) = qimode_label;
16899 /* Generate original signed/unsigned divimod. */
16900 div = gen_divmod4_1 (operands[0], operands[1],
16901 operands[2], operands[3]);
16904 /* Branch to the end. */
16905 emit_jump_insn (gen_jump (end_label));
16908 /* Generate 8bit unsigned divide. */
16909 emit_label (qimode_label);
16910 /* Don't use operands[0] for result of 8bit divide since not all
16911 registers support QImode ZERO_EXTRACT. */
16912 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16913 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16914 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16915 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16919 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16920 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16924 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16925 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16928 /* Extract remainder from AH. */
16929 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16930 if (REG_P (operands[1]))
16931 insn = emit_move_insn (operands[1], tmp1);
16934 /* Need a new scratch register since the old one has result
16936 scratch = gen_reg_rtx (mode);
16937 emit_move_insn (scratch, tmp1);
16938 insn = emit_move_insn (operands[1], scratch);
16940 set_unique_reg_note (insn, REG_EQUAL, mod);
16942 /* Zero extend quotient from AL. */
16943 tmp1 = gen_lowpart (QImode, tmp0);
16944 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16945 set_unique_reg_note (insn, REG_EQUAL, div);
16947 emit_label (end_label);
16950 #define LEA_MAX_STALL (3)
16951 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16953 /* Increase given DISTANCE in half-cycles according to
16954 dependencies between PREV and NEXT instructions.
16955 Add 1 half-cycle if there is no dependency and
16956 go to next cycle if there is some dependecy. */
16958 static unsigned int
16959 increase_distance (rtx prev, rtx next, unsigned int distance)
16964 if (!prev || !next)
16965 return distance + (distance & 1) + 2;
16967 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16968 return distance + 1;
16970 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16971 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16972 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16973 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16974 return distance + (distance & 1) + 2;
16976 return distance + 1;
16979 /* Function checks if instruction INSN defines register number
16980 REGNO1 or REGNO2. */
16983 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16988 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16989 if (DF_REF_REG_DEF_P (*def_rec)
16990 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16991 && (regno1 == DF_REF_REGNO (*def_rec)
16992 || regno2 == DF_REF_REGNO (*def_rec)))
17000 /* Function checks if instruction INSN uses register number
17001 REGNO as a part of address expression. */
17004 insn_uses_reg_mem (unsigned int regno, rtx insn)
17008 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17009 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17015 /* Search backward for non-agu definition of register number REGNO1
17016 or register number REGNO2 in basic block starting from instruction
17017 START up to head of basic block or instruction INSN.
17019 Function puts true value into *FOUND var if definition was found
17020 and false otherwise.
17022 Distance in half-cycles between START and found instruction or head
17023 of BB is added to DISTANCE and returned. */
17026 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17027 rtx insn, int distance,
17028 rtx start, bool *found)
17030 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17038 && distance < LEA_SEARCH_THRESHOLD)
17040 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17042 distance = increase_distance (prev, next, distance);
17043 if (insn_defines_reg (regno1, regno2, prev))
17045 if (recog_memoized (prev) < 0
17046 || get_attr_type (prev) != TYPE_LEA)
17055 if (prev == BB_HEAD (bb))
17058 prev = PREV_INSN (prev);
17064 /* Search backward for non-agu definition of register number REGNO1
17065 or register number REGNO2 in INSN's basic block until
17066 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17067 2. Reach neighbour BBs boundary, or
17068 3. Reach agu definition.
17069 Returns the distance between the non-agu definition point and INSN.
17070 If no definition point, returns -1. */
17073 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17076 basic_block bb = BLOCK_FOR_INSN (insn);
17078 bool found = false;
17080 if (insn != BB_HEAD (bb))
17081 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17082 distance, PREV_INSN (insn),
17085 if (!found && distance < LEA_SEARCH_THRESHOLD)
17089 bool simple_loop = false;
17091 FOR_EACH_EDGE (e, ei, bb->preds)
17094 simple_loop = true;
17099 distance = distance_non_agu_define_in_bb (regno1, regno2,
17101 BB_END (bb), &found);
17104 int shortest_dist = -1;
17105 bool found_in_bb = false;
17107 FOR_EACH_EDGE (e, ei, bb->preds)
17110 = distance_non_agu_define_in_bb (regno1, regno2,
17116 if (shortest_dist < 0)
17117 shortest_dist = bb_dist;
17118 else if (bb_dist > 0)
17119 shortest_dist = MIN (bb_dist, shortest_dist);
17125 distance = shortest_dist;
17129 /* get_attr_type may modify recog data. We want to make sure
17130 that recog data is valid for instruction INSN, on which
17131 distance_non_agu_define is called. INSN is unchanged here. */
17132 extract_insn_cached (insn);
17137 return distance >> 1;
17140 /* Return the distance in half-cycles between INSN and the next
17141 insn that uses register number REGNO in memory address added
17142 to DISTANCE. Return -1 if REGNO0 is set.
17144 Put true value into *FOUND if register usage was found and
17146 Put true value into *REDEFINED if register redefinition was
17147 found and false otherwise. */
17150 distance_agu_use_in_bb (unsigned int regno,
17151 rtx insn, int distance, rtx start,
17152 bool *found, bool *redefined)
17154 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17159 *redefined = false;
17163 && distance < LEA_SEARCH_THRESHOLD)
17165 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17167 distance = increase_distance(prev, next, distance);
17168 if (insn_uses_reg_mem (regno, next))
17170 /* Return DISTANCE if OP0 is used in memory
17171 address in NEXT. */
17176 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17178 /* Return -1 if OP0 is set in NEXT. */
17186 if (next == BB_END (bb))
17189 next = NEXT_INSN (next);
17195 /* Return the distance between INSN and the next insn that uses
17196 register number REGNO0 in memory address. Return -1 if no such
17197 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17200 distance_agu_use (unsigned int regno0, rtx insn)
17202 basic_block bb = BLOCK_FOR_INSN (insn);
17204 bool found = false;
17205 bool redefined = false;
17207 if (insn != BB_END (bb))
17208 distance = distance_agu_use_in_bb (regno0, insn, distance,
17210 &found, &redefined);
17212 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17216 bool simple_loop = false;
17218 FOR_EACH_EDGE (e, ei, bb->succs)
17221 simple_loop = true;
17226 distance = distance_agu_use_in_bb (regno0, insn,
17227 distance, BB_HEAD (bb),
17228 &found, &redefined);
17231 int shortest_dist = -1;
17232 bool found_in_bb = false;
17233 bool redefined_in_bb = false;
17235 FOR_EACH_EDGE (e, ei, bb->succs)
17238 = distance_agu_use_in_bb (regno0, insn,
17239 distance, BB_HEAD (e->dest),
17240 &found_in_bb, &redefined_in_bb);
17243 if (shortest_dist < 0)
17244 shortest_dist = bb_dist;
17245 else if (bb_dist > 0)
17246 shortest_dist = MIN (bb_dist, shortest_dist);
17252 distance = shortest_dist;
17256 if (!found || redefined)
17259 return distance >> 1;
17262 /* Define this macro to tune LEA priority vs ADD, it take effect when
17263 there is a dilemma of choicing LEA or ADD
17264 Negative value: ADD is more preferred than LEA
17266 Positive value: LEA is more preferred than ADD*/
17267 #define IX86_LEA_PRIORITY 0
17269 /* Return true if usage of lea INSN has performance advantage
17270 over a sequence of instructions. Instructions sequence has
17271 SPLIT_COST cycles higher latency than lea latency. */
17274 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
17275 unsigned int regno2, int split_cost, bool has_scale)
17277 int dist_define, dist_use;
17279 /* For Silvermont if using a 2-source or 3-source LEA for
17280 non-destructive destination purposes, or due to wanting
17281 ability to use SCALE, the use of LEA is justified. */
17282 if (ix86_tune == PROCESSOR_SLM)
17286 if (split_cost < 1)
17288 if (regno0 == regno1 || regno0 == regno2)
17293 dist_define = distance_non_agu_define (regno1, regno2, insn);
17294 dist_use = distance_agu_use (regno0, insn);
17296 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
17298 /* If there is no non AGU operand definition, no AGU
17299 operand usage and split cost is 0 then both lea
17300 and non lea variants have same priority. Currently
17301 we prefer lea for 64 bit code and non lea on 32 bit
17303 if (dist_use < 0 && split_cost == 0)
17304 return TARGET_64BIT || IX86_LEA_PRIORITY;
17309 /* With longer definitions distance lea is more preferable.
17310 Here we change it to take into account splitting cost and
17312 dist_define += split_cost + IX86_LEA_PRIORITY;
17314 /* If there is no use in memory addess then we just check
17315 that split cost exceeds AGU stall. */
17317 return dist_define > LEA_MAX_STALL;
17319 /* If this insn has both backward non-agu dependence and forward
17320 agu dependence, the one with short distance takes effect. */
17321 return dist_define >= dist_use;
17324 /* Return true if it is legal to clobber flags by INSN and
17325 false otherwise. */
17328 ix86_ok_to_clobber_flags (rtx insn)
17330 basic_block bb = BLOCK_FOR_INSN (insn);
17336 if (NONDEBUG_INSN_P (insn))
17338 for (use = DF_INSN_USES (insn); *use; use++)
17339 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
17342 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
17346 if (insn == BB_END (bb))
17349 insn = NEXT_INSN (insn);
17352 live = df_get_live_out(bb);
17353 return !REGNO_REG_SET_P (live, FLAGS_REG);
17356 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17357 move and add to avoid AGU stalls. */
17360 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
17362 unsigned int regno0, regno1, regno2;
17364 /* Check if we need to optimize. */
17365 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17368 /* Check it is correct to split here. */
17369 if (!ix86_ok_to_clobber_flags(insn))
17372 regno0 = true_regnum (operands[0]);
17373 regno1 = true_regnum (operands[1]);
17374 regno2 = true_regnum (operands[2]);
17376 /* We need to split only adds with non destructive
17377 destination operand. */
17378 if (regno0 == regno1 || regno0 == regno2)
17381 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
17384 /* Return true if we should emit lea instruction instead of mov
17388 ix86_use_lea_for_mov (rtx insn, rtx operands[])
17390 unsigned int regno0, regno1;
17392 /* Check if we need to optimize. */
17393 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17396 /* Use lea for reg to reg moves only. */
17397 if (!REG_P (operands[0]) || !REG_P (operands[1]))
17400 regno0 = true_regnum (operands[0]);
17401 regno1 = true_regnum (operands[1]);
17403 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
17406 /* Return true if we need to split lea into a sequence of
17407 instructions to avoid AGU stalls. */
17410 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
17412 unsigned int regno0, regno1, regno2;
17414 struct ix86_address parts;
17417 /* Check we need to optimize. */
17418 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17421 /* Check it is correct to split here. */
17422 if (!ix86_ok_to_clobber_flags(insn))
17425 ok = ix86_decompose_address (operands[1], &parts);
17428 /* There should be at least two components in the address. */
17429 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
17430 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
17433 /* We should not split into add if non legitimate pic
17434 operand is used as displacement. */
17435 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
17438 regno0 = true_regnum (operands[0]) ;
17439 regno1 = INVALID_REGNUM;
17440 regno2 = INVALID_REGNUM;
17443 regno1 = true_regnum (parts.base);
17445 regno2 = true_regnum (parts.index);
17449 /* Compute how many cycles we will add to execution time
17450 if split lea into a sequence of instructions. */
17451 if (parts.base || parts.index)
17453 /* Have to use mov instruction if non desctructive
17454 destination form is used. */
17455 if (regno1 != regno0 && regno2 != regno0)
17458 /* Have to add index to base if both exist. */
17459 if (parts.base && parts.index)
17462 /* Have to use shift and adds if scale is 2 or greater. */
17463 if (parts.scale > 1)
17465 if (regno0 != regno1)
17467 else if (regno2 == regno0)
17470 split_cost += parts.scale;
17473 /* Have to use add instruction with immediate if
17474 disp is non zero. */
17475 if (parts.disp && parts.disp != const0_rtx)
17478 /* Subtract the price of lea. */
17482 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
17486 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17487 matches destination. RTX includes clobber of FLAGS_REG. */
17490 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
17495 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
17496 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17498 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17501 /* Return true if regno1 def is nearest to the insn. */
17504 find_nearest_reg_def (rtx insn, int regno1, int regno2)
17507 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
17511 while (prev && prev != start)
17513 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
17515 prev = PREV_INSN (prev);
17518 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
17520 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
17522 prev = PREV_INSN (prev);
17525 /* None of the regs is defined in the bb. */
17529 /* Split lea instructions into a sequence of instructions
17530 which are executed on ALU to avoid AGU stalls.
17531 It is assumed that it is allowed to clobber flags register
17532 at lea position. */
17535 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
17537 unsigned int regno0, regno1, regno2;
17538 struct ix86_address parts;
17542 ok = ix86_decompose_address (operands[1], &parts);
17545 target = gen_lowpart (mode, operands[0]);
17547 regno0 = true_regnum (target);
17548 regno1 = INVALID_REGNUM;
17549 regno2 = INVALID_REGNUM;
17553 parts.base = gen_lowpart (mode, parts.base);
17554 regno1 = true_regnum (parts.base);
17559 parts.index = gen_lowpart (mode, parts.index);
17560 regno2 = true_regnum (parts.index);
17564 parts.disp = gen_lowpart (mode, parts.disp);
17566 if (parts.scale > 1)
17568 /* Case r1 = r1 + ... */
17569 if (regno1 == regno0)
17571 /* If we have a case r1 = r1 + C * r1 then we
17572 should use multiplication which is very
17573 expensive. Assume cost model is wrong if we
17574 have such case here. */
17575 gcc_assert (regno2 != regno0);
17577 for (adds = parts.scale; adds > 0; adds--)
17578 ix86_emit_binop (PLUS, mode, target, parts.index);
17582 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17583 if (regno0 != regno2)
17584 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
17586 /* Use shift for scaling. */
17587 ix86_emit_binop (ASHIFT, mode, target,
17588 GEN_INT (exact_log2 (parts.scale)));
17591 ix86_emit_binop (PLUS, mode, target, parts.base);
17593 if (parts.disp && parts.disp != const0_rtx)
17594 ix86_emit_binop (PLUS, mode, target, parts.disp);
17597 else if (!parts.base && !parts.index)
17599 gcc_assert(parts.disp);
17600 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
17606 if (regno0 != regno2)
17607 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
17609 else if (!parts.index)
17611 if (regno0 != regno1)
17612 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
17616 if (regno0 == regno1)
17618 else if (regno0 == regno2)
17624 /* Find better operand for SET instruction, depending
17625 on which definition is farther from the insn. */
17626 if (find_nearest_reg_def (insn, regno1, regno2))
17627 tmp = parts.index, tmp1 = parts.base;
17629 tmp = parts.base, tmp1 = parts.index;
17631 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17633 if (parts.disp && parts.disp != const0_rtx)
17634 ix86_emit_binop (PLUS, mode, target, parts.disp);
17636 ix86_emit_binop (PLUS, mode, target, tmp1);
17640 ix86_emit_binop (PLUS, mode, target, tmp);
17643 if (parts.disp && parts.disp != const0_rtx)
17644 ix86_emit_binop (PLUS, mode, target, parts.disp);
17648 /* Return true if it is ok to optimize an ADD operation to LEA
17649 operation to avoid flag register consumation. For most processors,
17650 ADD is faster than LEA. For the processors like ATOM, if the
17651 destination register of LEA holds an actual address which will be
17652 used soon, LEA is better and otherwise ADD is better. */
17655 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17657 unsigned int regno0 = true_regnum (operands[0]);
17658 unsigned int regno1 = true_regnum (operands[1]);
17659 unsigned int regno2 = true_regnum (operands[2]);
17661 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17662 if (regno0 != regno1 && regno0 != regno2)
17665 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17668 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
17671 /* Return true if destination reg of SET_BODY is shift count of
17675 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17681 /* Retrieve destination of SET_BODY. */
17682 switch (GET_CODE (set_body))
17685 set_dest = SET_DEST (set_body);
17686 if (!set_dest || !REG_P (set_dest))
17690 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17691 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17699 /* Retrieve shift count of USE_BODY. */
17700 switch (GET_CODE (use_body))
17703 shift_rtx = XEXP (use_body, 1);
17706 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17707 if (ix86_dep_by_shift_count_body (set_body,
17708 XVECEXP (use_body, 0, i)))
17716 && (GET_CODE (shift_rtx) == ASHIFT
17717 || GET_CODE (shift_rtx) == LSHIFTRT
17718 || GET_CODE (shift_rtx) == ASHIFTRT
17719 || GET_CODE (shift_rtx) == ROTATE
17720 || GET_CODE (shift_rtx) == ROTATERT))
17722 rtx shift_count = XEXP (shift_rtx, 1);
17724 /* Return true if shift count is dest of SET_BODY. */
17725 if (REG_P (shift_count))
17727 /* Add check since it can be invoked before register
17728 allocation in pre-reload schedule. */
17729 if (reload_completed
17730 && true_regnum (set_dest) == true_regnum (shift_count))
17732 else if (REGNO(set_dest) == REGNO(shift_count))
17740 /* Return true if destination reg of SET_INSN is shift count of
17744 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17746 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17747 PATTERN (use_insn));
17750 /* Return TRUE or FALSE depending on whether the unary operator meets the
17751 appropriate constraints. */
17754 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17755 enum machine_mode mode ATTRIBUTE_UNUSED,
17756 rtx operands[2] ATTRIBUTE_UNUSED)
17758 /* If one of operands is memory, source and destination must match. */
17759 if ((MEM_P (operands[0])
17760 || MEM_P (operands[1]))
17761 && ! rtx_equal_p (operands[0], operands[1]))
17766 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17767 are ok, keeping in mind the possible movddup alternative. */
17770 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17772 if (MEM_P (operands[0]))
17773 return rtx_equal_p (operands[0], operands[1 + high]);
17774 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17775 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17779 /* Post-reload splitter for converting an SF or DFmode value in an
17780 SSE register into an unsigned SImode. */
17783 ix86_split_convert_uns_si_sse (rtx operands[])
17785 enum machine_mode vecmode;
17786 rtx value, large, zero_or_two31, input, two31, x;
17788 large = operands[1];
17789 zero_or_two31 = operands[2];
17790 input = operands[3];
17791 two31 = operands[4];
17792 vecmode = GET_MODE (large);
17793 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17795 /* Load up the value into the low element. We must ensure that the other
17796 elements are valid floats -- zero is the easiest such value. */
17799 if (vecmode == V4SFmode)
17800 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17802 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17806 input = gen_rtx_REG (vecmode, REGNO (input));
17807 emit_move_insn (value, CONST0_RTX (vecmode));
17808 if (vecmode == V4SFmode)
17809 emit_insn (gen_sse_movss (value, value, input));
17811 emit_insn (gen_sse2_movsd (value, value, input));
17814 emit_move_insn (large, two31);
17815 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17817 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17818 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17820 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17821 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17823 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17824 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17826 large = gen_rtx_REG (V4SImode, REGNO (large));
17827 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17829 x = gen_rtx_REG (V4SImode, REGNO (value));
17830 if (vecmode == V4SFmode)
17831 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17833 emit_insn (gen_sse2_cvttpd2dq (x, value));
17836 emit_insn (gen_xorv4si3 (value, value, large));
17839 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17840 Expects the 64-bit DImode to be supplied in a pair of integral
17841 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17842 -mfpmath=sse, !optimize_size only. */
17845 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17847 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17848 rtx int_xmm, fp_xmm;
17849 rtx biases, exponents;
17852 int_xmm = gen_reg_rtx (V4SImode);
17853 if (TARGET_INTER_UNIT_MOVES)
17854 emit_insn (gen_movdi_to_sse (int_xmm, input));
17855 else if (TARGET_SSE_SPLIT_REGS)
17857 emit_clobber (int_xmm);
17858 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17862 x = gen_reg_rtx (V2DImode);
17863 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17864 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17867 x = gen_rtx_CONST_VECTOR (V4SImode,
17868 gen_rtvec (4, GEN_INT (0x43300000UL),
17869 GEN_INT (0x45300000UL),
17870 const0_rtx, const0_rtx));
17871 exponents = validize_mem (force_const_mem (V4SImode, x));
17873 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17874 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17876 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17877 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17878 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17879 (0x1.0p84 + double(fp_value_hi_xmm)).
17880 Note these exponents differ by 32. */
17882 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17884 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17885 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17886 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17887 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17888 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17889 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17890 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17891 biases = validize_mem (force_const_mem (V2DFmode, biases));
17892 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17894 /* Add the upper and lower DFmode values together. */
17896 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17899 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17900 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17901 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17904 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17907 /* Not used, but eases macroization of patterns. */
17909 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17910 rtx input ATTRIBUTE_UNUSED)
17912 gcc_unreachable ();
17915 /* Convert an unsigned SImode value into a DFmode. Only currently used
17916 for SSE, but applicable anywhere. */
17919 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17921 REAL_VALUE_TYPE TWO31r;
17924 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17925 NULL, 1, OPTAB_DIRECT);
17927 fp = gen_reg_rtx (DFmode);
17928 emit_insn (gen_floatsidf2 (fp, x));
17930 real_ldexp (&TWO31r, &dconst1, 31);
17931 x = const_double_from_real_value (TWO31r, DFmode);
17933 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17935 emit_move_insn (target, x);
17938 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17939 32-bit mode; otherwise we have a direct convert instruction. */
17942 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17944 REAL_VALUE_TYPE TWO32r;
17945 rtx fp_lo, fp_hi, x;
17947 fp_lo = gen_reg_rtx (DFmode);
17948 fp_hi = gen_reg_rtx (DFmode);
17950 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17952 real_ldexp (&TWO32r, &dconst1, 32);
17953 x = const_double_from_real_value (TWO32r, DFmode);
17954 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17956 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17958 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17961 emit_move_insn (target, x);
17964 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17965 For x86_32, -mfpmath=sse, !optimize_size only. */
17967 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17969 REAL_VALUE_TYPE ONE16r;
17970 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17972 real_ldexp (&ONE16r, &dconst1, 16);
17973 x = const_double_from_real_value (ONE16r, SFmode);
17974 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17975 NULL, 0, OPTAB_DIRECT);
17976 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17977 NULL, 0, OPTAB_DIRECT);
17978 fp_hi = gen_reg_rtx (SFmode);
17979 fp_lo = gen_reg_rtx (SFmode);
17980 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17981 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17982 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17984 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17986 if (!rtx_equal_p (target, fp_hi))
17987 emit_move_insn (target, fp_hi);
17990 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17991 a vector of unsigned ints VAL to vector of floats TARGET. */
17994 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17997 REAL_VALUE_TYPE TWO16r;
17998 enum machine_mode intmode = GET_MODE (val);
17999 enum machine_mode fltmode = GET_MODE (target);
18000 rtx (*cvt) (rtx, rtx);
18002 if (intmode == V4SImode)
18003 cvt = gen_floatv4siv4sf2;
18005 cvt = gen_floatv8siv8sf2;
18006 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18007 tmp[0] = force_reg (intmode, tmp[0]);
18008 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18010 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18011 NULL_RTX, 1, OPTAB_DIRECT);
18012 tmp[3] = gen_reg_rtx (fltmode);
18013 emit_insn (cvt (tmp[3], tmp[1]));
18014 tmp[4] = gen_reg_rtx (fltmode);
18015 emit_insn (cvt (tmp[4], tmp[2]));
18016 real_ldexp (&TWO16r, &dconst1, 16);
18017 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18018 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18019 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18021 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18023 if (tmp[7] != target)
18024 emit_move_insn (target, tmp[7]);
18027 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18028 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18029 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18030 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18033 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18035 REAL_VALUE_TYPE TWO31r;
18036 rtx two31r, tmp[4];
18037 enum machine_mode mode = GET_MODE (val);
18038 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18039 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18040 rtx (*cmp) (rtx, rtx, rtx, rtx);
18043 for (i = 0; i < 3; i++)
18044 tmp[i] = gen_reg_rtx (mode);
18045 real_ldexp (&TWO31r, &dconst1, 31);
18046 two31r = const_double_from_real_value (TWO31r, scalarmode);
18047 two31r = ix86_build_const_vector (mode, 1, two31r);
18048 two31r = force_reg (mode, two31r);
18051 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18052 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18053 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18054 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18055 default: gcc_unreachable ();
18057 tmp[3] = gen_rtx_LE (mode, two31r, val);
18058 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18059 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18061 if (intmode == V4SImode || TARGET_AVX2)
18062 *xorp = expand_simple_binop (intmode, ASHIFT,
18063 gen_lowpart (intmode, tmp[0]),
18064 GEN_INT (31), NULL_RTX, 0,
18068 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18069 two31 = ix86_build_const_vector (intmode, 1, two31);
18070 *xorp = expand_simple_binop (intmode, AND,
18071 gen_lowpart (intmode, tmp[0]),
18072 two31, NULL_RTX, 0,
18075 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18079 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18080 then replicate the value for all elements of the vector
18084 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18088 enum machine_mode scalar_mode;
18105 n_elt = GET_MODE_NUNITS (mode);
18106 v = rtvec_alloc (n_elt);
18107 scalar_mode = GET_MODE_INNER (mode);
18109 RTVEC_ELT (v, 0) = value;
18111 for (i = 1; i < n_elt; ++i)
18112 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18114 return gen_rtx_CONST_VECTOR (mode, v);
18117 gcc_unreachable ();
18121 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18122 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18123 for an SSE register. If VECT is true, then replicate the mask for
18124 all elements of the vector register. If INVERT is true, then create
18125 a mask excluding the sign bit. */
18128 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18130 enum machine_mode vec_mode, imode;
18131 HOST_WIDE_INT hi, lo;
18136 /* Find the sign bit, sign extended to 2*HWI. */
18144 mode = GET_MODE_INNER (mode);
18146 lo = 0x80000000, hi = lo < 0;
18154 mode = GET_MODE_INNER (mode);
18156 if (HOST_BITS_PER_WIDE_INT >= 64)
18157 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18159 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18164 vec_mode = VOIDmode;
18165 if (HOST_BITS_PER_WIDE_INT >= 64)
18168 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18175 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18179 lo = ~lo, hi = ~hi;
18185 mask = immed_double_const (lo, hi, imode);
18187 vec = gen_rtvec (2, v, mask);
18188 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18189 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18196 gcc_unreachable ();
18200 lo = ~lo, hi = ~hi;
18202 /* Force this value into the low part of a fp vector constant. */
18203 mask = immed_double_const (lo, hi, imode);
18204 mask = gen_lowpart (mode, mask);
18206 if (vec_mode == VOIDmode)
18207 return force_reg (mode, mask);
18209 v = ix86_build_const_vector (vec_mode, vect, mask);
18210 return force_reg (vec_mode, v);
18213 /* Generate code for floating point ABS or NEG. */
18216 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
18219 rtx mask, set, dst, src;
18220 bool use_sse = false;
18221 bool vector_mode = VECTOR_MODE_P (mode);
18222 enum machine_mode vmode = mode;
18226 else if (mode == TFmode)
18228 else if (TARGET_SSE_MATH)
18230 use_sse = SSE_FLOAT_MODE_P (mode);
18231 if (mode == SFmode)
18233 else if (mode == DFmode)
18237 /* NEG and ABS performed with SSE use bitwise mask operations.
18238 Create the appropriate mask now. */
18240 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
18247 set = gen_rtx_fmt_e (code, mode, src);
18248 set = gen_rtx_SET (VOIDmode, dst, set);
18255 use = gen_rtx_USE (VOIDmode, mask);
18257 par = gen_rtvec (2, set, use);
18260 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18261 par = gen_rtvec (3, set, use, clob);
18263 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
18269 /* Expand a copysign operation. Special case operand 0 being a constant. */
18272 ix86_expand_copysign (rtx operands[])
18274 enum machine_mode mode, vmode;
18275 rtx dest, op0, op1, mask, nmask;
18277 dest = operands[0];
18281 mode = GET_MODE (dest);
18283 if (mode == SFmode)
18285 else if (mode == DFmode)
18290 if (GET_CODE (op0) == CONST_DOUBLE)
18292 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
18294 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
18295 op0 = simplify_unary_operation (ABS, mode, op0, mode);
18297 if (mode == SFmode || mode == DFmode)
18299 if (op0 == CONST0_RTX (mode))
18300 op0 = CONST0_RTX (vmode);
18303 rtx v = ix86_build_const_vector (vmode, false, op0);
18305 op0 = force_reg (vmode, v);
18308 else if (op0 != CONST0_RTX (mode))
18309 op0 = force_reg (mode, op0);
18311 mask = ix86_build_signbit_mask (vmode, 0, 0);
18313 if (mode == SFmode)
18314 copysign_insn = gen_copysignsf3_const;
18315 else if (mode == DFmode)
18316 copysign_insn = gen_copysigndf3_const;
18318 copysign_insn = gen_copysigntf3_const;
18320 emit_insn (copysign_insn (dest, op0, op1, mask));
18324 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
18326 nmask = ix86_build_signbit_mask (vmode, 0, 1);
18327 mask = ix86_build_signbit_mask (vmode, 0, 0);
18329 if (mode == SFmode)
18330 copysign_insn = gen_copysignsf3_var;
18331 else if (mode == DFmode)
18332 copysign_insn = gen_copysigndf3_var;
18334 copysign_insn = gen_copysigntf3_var;
18336 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
18340 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18341 be a constant, and so has already been expanded into a vector constant. */
18344 ix86_split_copysign_const (rtx operands[])
18346 enum machine_mode mode, vmode;
18347 rtx dest, op0, mask, x;
18349 dest = operands[0];
18351 mask = operands[3];
18353 mode = GET_MODE (dest);
18354 vmode = GET_MODE (mask);
18356 dest = simplify_gen_subreg (vmode, dest, mode, 0);
18357 x = gen_rtx_AND (vmode, dest, mask);
18358 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18360 if (op0 != CONST0_RTX (vmode))
18362 x = gen_rtx_IOR (vmode, dest, op0);
18363 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18367 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18368 so we have to do two masks. */
18371 ix86_split_copysign_var (rtx operands[])
18373 enum machine_mode mode, vmode;
18374 rtx dest, scratch, op0, op1, mask, nmask, x;
18376 dest = operands[0];
18377 scratch = operands[1];
18380 nmask = operands[4];
18381 mask = operands[5];
18383 mode = GET_MODE (dest);
18384 vmode = GET_MODE (mask);
18386 if (rtx_equal_p (op0, op1))
18388 /* Shouldn't happen often (it's useless, obviously), but when it does
18389 we'd generate incorrect code if we continue below. */
18390 emit_move_insn (dest, op0);
18394 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
18396 gcc_assert (REGNO (op1) == REGNO (scratch));
18398 x = gen_rtx_AND (vmode, scratch, mask);
18399 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
18402 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
18403 x = gen_rtx_NOT (vmode, dest);
18404 x = gen_rtx_AND (vmode, x, op0);
18405 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18409 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
18411 x = gen_rtx_AND (vmode, scratch, mask);
18413 else /* alternative 2,4 */
18415 gcc_assert (REGNO (mask) == REGNO (scratch));
18416 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
18417 x = gen_rtx_AND (vmode, scratch, op1);
18419 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
18421 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
18423 dest = simplify_gen_subreg (vmode, op0, mode, 0);
18424 x = gen_rtx_AND (vmode, dest, nmask);
18426 else /* alternative 3,4 */
18428 gcc_assert (REGNO (nmask) == REGNO (dest));
18430 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
18431 x = gen_rtx_AND (vmode, dest, op0);
18433 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18436 x = gen_rtx_IOR (vmode, dest, scratch);
18437 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18440 /* Return TRUE or FALSE depending on whether the first SET in INSN
18441 has source and destination with matching CC modes, and that the
18442 CC mode is at least as constrained as REQ_MODE. */
18445 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
18448 enum machine_mode set_mode;
18450 set = PATTERN (insn);
18451 if (GET_CODE (set) == PARALLEL)
18452 set = XVECEXP (set, 0, 0);
18453 gcc_assert (GET_CODE (set) == SET);
18454 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
18456 set_mode = GET_MODE (SET_DEST (set));
18460 if (req_mode != CCNOmode
18461 && (req_mode != CCmode
18462 || XEXP (SET_SRC (set), 1) != const0_rtx))
18466 if (req_mode == CCGCmode)
18470 if (req_mode == CCGOCmode || req_mode == CCNOmode)
18474 if (req_mode == CCZmode)
18484 if (set_mode != req_mode)
18489 gcc_unreachable ();
18492 return GET_MODE (SET_SRC (set)) == set_mode;
18495 /* Generate insn patterns to do an integer compare of OPERANDS. */
18498 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
18500 enum machine_mode cmpmode;
18503 cmpmode = SELECT_CC_MODE (code, op0, op1);
18504 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
18506 /* This is very simple, but making the interface the same as in the
18507 FP case makes the rest of the code easier. */
18508 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
18509 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
18511 /* Return the test that should be put into the flags user, i.e.
18512 the bcc, scc, or cmov instruction. */
18513 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
18516 /* Figure out whether to use ordered or unordered fp comparisons.
18517 Return the appropriate mode to use. */
18520 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
18522 /* ??? In order to make all comparisons reversible, we do all comparisons
18523 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18524 all forms trapping and nontrapping comparisons, we can make inequality
18525 comparisons trapping again, since it results in better code when using
18526 FCOM based compares. */
18527 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
18531 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
18533 enum machine_mode mode = GET_MODE (op0);
18535 if (SCALAR_FLOAT_MODE_P (mode))
18537 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18538 return ix86_fp_compare_mode (code);
18543 /* Only zero flag is needed. */
18544 case EQ: /* ZF=0 */
18545 case NE: /* ZF!=0 */
18547 /* Codes needing carry flag. */
18548 case GEU: /* CF=0 */
18549 case LTU: /* CF=1 */
18550 /* Detect overflow checks. They need just the carry flag. */
18551 if (GET_CODE (op0) == PLUS
18552 && rtx_equal_p (op1, XEXP (op0, 0)))
18556 case GTU: /* CF=0 & ZF=0 */
18557 case LEU: /* CF=1 | ZF=1 */
18558 /* Detect overflow checks. They need just the carry flag. */
18559 if (GET_CODE (op0) == MINUS
18560 && rtx_equal_p (op1, XEXP (op0, 0)))
18564 /* Codes possibly doable only with sign flag when
18565 comparing against zero. */
18566 case GE: /* SF=OF or SF=0 */
18567 case LT: /* SF<>OF or SF=1 */
18568 if (op1 == const0_rtx)
18571 /* For other cases Carry flag is not required. */
18573 /* Codes doable only with sign flag when comparing
18574 against zero, but we miss jump instruction for it
18575 so we need to use relational tests against overflow
18576 that thus needs to be zero. */
18577 case GT: /* ZF=0 & SF=OF */
18578 case LE: /* ZF=1 | SF<>OF */
18579 if (op1 == const0_rtx)
18583 /* strcmp pattern do (use flags) and combine may ask us for proper
18588 gcc_unreachable ();
18592 /* Return the fixed registers used for condition codes. */
18595 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
18602 /* If two condition code modes are compatible, return a condition code
18603 mode which is compatible with both. Otherwise, return
18606 static enum machine_mode
18607 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
18612 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
18615 if ((m1 == CCGCmode && m2 == CCGOCmode)
18616 || (m1 == CCGOCmode && m2 == CCGCmode))
18619 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
18621 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
18627 gcc_unreachable ();
18657 /* These are only compatible with themselves, which we already
18664 /* Return a comparison we can do and that it is equivalent to
18665 swap_condition (code) apart possibly from orderedness.
18666 But, never change orderedness if TARGET_IEEE_FP, returning
18667 UNKNOWN in that case if necessary. */
18669 static enum rtx_code
18670 ix86_fp_swap_condition (enum rtx_code code)
18674 case GT: /* GTU - CF=0 & ZF=0 */
18675 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18676 case GE: /* GEU - CF=0 */
18677 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18678 case UNLT: /* LTU - CF=1 */
18679 return TARGET_IEEE_FP ? UNKNOWN : GT;
18680 case UNLE: /* LEU - CF=1 | ZF=1 */
18681 return TARGET_IEEE_FP ? UNKNOWN : GE;
18683 return swap_condition (code);
18687 /* Return cost of comparison CODE using the best strategy for performance.
18688 All following functions do use number of instructions as a cost metrics.
18689 In future this should be tweaked to compute bytes for optimize_size and
18690 take into account performance of various instructions on various CPUs. */
18693 ix86_fp_comparison_cost (enum rtx_code code)
18697 /* The cost of code using bit-twiddling on %ah. */
18714 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18718 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18721 gcc_unreachable ();
18724 switch (ix86_fp_comparison_strategy (code))
18726 case IX86_FPCMP_COMI:
18727 return arith_cost > 4 ? 3 : 2;
18728 case IX86_FPCMP_SAHF:
18729 return arith_cost > 4 ? 4 : 3;
18735 /* Return strategy to use for floating-point. We assume that fcomi is always
18736 preferrable where available, since that is also true when looking at size
18737 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18739 enum ix86_fpcmp_strategy
18740 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18742 /* Do fcomi/sahf based test when profitable. */
18745 return IX86_FPCMP_COMI;
18747 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18748 return IX86_FPCMP_SAHF;
18750 return IX86_FPCMP_ARITH;
18753 /* Swap, force into registers, or otherwise massage the two operands
18754 to a fp comparison. The operands are updated in place; the new
18755 comparison code is returned. */
18757 static enum rtx_code
18758 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18760 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18761 rtx op0 = *pop0, op1 = *pop1;
18762 enum machine_mode op_mode = GET_MODE (op0);
18763 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18765 /* All of the unordered compare instructions only work on registers.
18766 The same is true of the fcomi compare instructions. The XFmode
18767 compare instructions require registers except when comparing
18768 against zero or when converting operand 1 from fixed point to
18772 && (fpcmp_mode == CCFPUmode
18773 || (op_mode == XFmode
18774 && ! (standard_80387_constant_p (op0) == 1
18775 || standard_80387_constant_p (op1) == 1)
18776 && GET_CODE (op1) != FLOAT)
18777 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18779 op0 = force_reg (op_mode, op0);
18780 op1 = force_reg (op_mode, op1);
18784 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18785 things around if they appear profitable, otherwise force op0
18786 into a register. */
18788 if (standard_80387_constant_p (op0) == 0
18790 && ! (standard_80387_constant_p (op1) == 0
18793 enum rtx_code new_code = ix86_fp_swap_condition (code);
18794 if (new_code != UNKNOWN)
18797 tmp = op0, op0 = op1, op1 = tmp;
18803 op0 = force_reg (op_mode, op0);
18805 if (CONSTANT_P (op1))
18807 int tmp = standard_80387_constant_p (op1);
18809 op1 = validize_mem (force_const_mem (op_mode, op1));
18813 op1 = force_reg (op_mode, op1);
18816 op1 = force_reg (op_mode, op1);
18820 /* Try to rearrange the comparison to make it cheaper. */
18821 if (ix86_fp_comparison_cost (code)
18822 > ix86_fp_comparison_cost (swap_condition (code))
18823 && (REG_P (op1) || can_create_pseudo_p ()))
18826 tmp = op0, op0 = op1, op1 = tmp;
18827 code = swap_condition (code);
18829 op0 = force_reg (op_mode, op0);
18837 /* Convert comparison codes we use to represent FP comparison to integer
18838 code that will result in proper branch. Return UNKNOWN if no such code
18842 ix86_fp_compare_code_to_integer (enum rtx_code code)
18871 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18874 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18876 enum machine_mode fpcmp_mode, intcmp_mode;
18879 fpcmp_mode = ix86_fp_compare_mode (code);
18880 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18882 /* Do fcomi/sahf based test when profitable. */
18883 switch (ix86_fp_comparison_strategy (code))
18885 case IX86_FPCMP_COMI:
18886 intcmp_mode = fpcmp_mode;
18887 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18888 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18893 case IX86_FPCMP_SAHF:
18894 intcmp_mode = fpcmp_mode;
18895 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18896 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18900 scratch = gen_reg_rtx (HImode);
18901 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18902 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18905 case IX86_FPCMP_ARITH:
18906 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18907 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18908 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18910 scratch = gen_reg_rtx (HImode);
18911 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18913 /* In the unordered case, we have to check C2 for NaN's, which
18914 doesn't happen to work out to anything nice combination-wise.
18915 So do some bit twiddling on the value we've got in AH to come
18916 up with an appropriate set of condition codes. */
18918 intcmp_mode = CCNOmode;
18923 if (code == GT || !TARGET_IEEE_FP)
18925 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18930 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18931 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18932 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18933 intcmp_mode = CCmode;
18939 if (code == LT && TARGET_IEEE_FP)
18941 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18942 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18943 intcmp_mode = CCmode;
18948 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18954 if (code == GE || !TARGET_IEEE_FP)
18956 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18961 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18962 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18968 if (code == LE && TARGET_IEEE_FP)
18970 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18971 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18972 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18973 intcmp_mode = CCmode;
18978 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18984 if (code == EQ && TARGET_IEEE_FP)
18986 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18987 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18988 intcmp_mode = CCmode;
18993 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18999 if (code == NE && TARGET_IEEE_FP)
19001 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19002 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19008 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19014 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19018 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19023 gcc_unreachable ();
19031 /* Return the test that should be put into the flags user, i.e.
19032 the bcc, scc, or cmov instruction. */
19033 return gen_rtx_fmt_ee (code, VOIDmode,
19034 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19039 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19043 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19044 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19046 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19048 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19049 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19052 ret = ix86_expand_int_compare (code, op0, op1);
19058 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19060 enum machine_mode mode = GET_MODE (op0);
19072 tmp = ix86_expand_compare (code, op0, op1);
19073 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19074 gen_rtx_LABEL_REF (VOIDmode, label),
19076 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19083 /* Expand DImode branch into multiple compare+branch. */
19085 rtx lo[2], hi[2], label2;
19086 enum rtx_code code1, code2, code3;
19087 enum machine_mode submode;
19089 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19091 tmp = op0, op0 = op1, op1 = tmp;
19092 code = swap_condition (code);
19095 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19096 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19098 submode = mode == DImode ? SImode : DImode;
19100 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19101 avoid two branches. This costs one extra insn, so disable when
19102 optimizing for size. */
19104 if ((code == EQ || code == NE)
19105 && (!optimize_insn_for_size_p ()
19106 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19111 if (hi[1] != const0_rtx)
19112 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19113 NULL_RTX, 0, OPTAB_WIDEN);
19116 if (lo[1] != const0_rtx)
19117 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19118 NULL_RTX, 0, OPTAB_WIDEN);
19120 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19121 NULL_RTX, 0, OPTAB_WIDEN);
19123 ix86_expand_branch (code, tmp, const0_rtx, label);
19127 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19128 op1 is a constant and the low word is zero, then we can just
19129 examine the high word. Similarly for low word -1 and
19130 less-or-equal-than or greater-than. */
19132 if (CONST_INT_P (hi[1]))
19135 case LT: case LTU: case GE: case GEU:
19136 if (lo[1] == const0_rtx)
19138 ix86_expand_branch (code, hi[0], hi[1], label);
19142 case LE: case LEU: case GT: case GTU:
19143 if (lo[1] == constm1_rtx)
19145 ix86_expand_branch (code, hi[0], hi[1], label);
19153 /* Otherwise, we need two or three jumps. */
19155 label2 = gen_label_rtx ();
19158 code2 = swap_condition (code);
19159 code3 = unsigned_condition (code);
19163 case LT: case GT: case LTU: case GTU:
19166 case LE: code1 = LT; code2 = GT; break;
19167 case GE: code1 = GT; code2 = LT; break;
19168 case LEU: code1 = LTU; code2 = GTU; break;
19169 case GEU: code1 = GTU; code2 = LTU; break;
19171 case EQ: code1 = UNKNOWN; code2 = NE; break;
19172 case NE: code2 = UNKNOWN; break;
19175 gcc_unreachable ();
19180 * if (hi(a) < hi(b)) goto true;
19181 * if (hi(a) > hi(b)) goto false;
19182 * if (lo(a) < lo(b)) goto true;
19186 if (code1 != UNKNOWN)
19187 ix86_expand_branch (code1, hi[0], hi[1], label);
19188 if (code2 != UNKNOWN)
19189 ix86_expand_branch (code2, hi[0], hi[1], label2);
19191 ix86_expand_branch (code3, lo[0], lo[1], label);
19193 if (code2 != UNKNOWN)
19194 emit_label (label2);
19199 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19204 /* Split branch based on floating point condition. */
19206 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19207 rtx target1, rtx target2, rtx tmp, rtx pushed)
19212 if (target2 != pc_rtx)
19215 code = reverse_condition_maybe_unordered (code);
19220 condition = ix86_expand_fp_compare (code, op1, op2,
19223 /* Remove pushed operand from stack. */
19225 ix86_free_from_memory (GET_MODE (pushed));
19227 i = emit_jump_insn (gen_rtx_SET
19229 gen_rtx_IF_THEN_ELSE (VOIDmode,
19230 condition, target1, target2)));
19231 if (split_branch_probability >= 0)
19232 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
19236 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19240 gcc_assert (GET_MODE (dest) == QImode);
19242 ret = ix86_expand_compare (code, op0, op1);
19243 PUT_MODE (ret, QImode);
19244 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
19247 /* Expand comparison setting or clearing carry flag. Return true when
19248 successful and set pop for the operation. */
19250 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
19252 enum machine_mode mode =
19253 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
19255 /* Do not handle double-mode compares that go through special path. */
19256 if (mode == (TARGET_64BIT ? TImode : DImode))
19259 if (SCALAR_FLOAT_MODE_P (mode))
19261 rtx compare_op, compare_seq;
19263 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19265 /* Shortcut: following common codes never translate
19266 into carry flag compares. */
19267 if (code == EQ || code == NE || code == UNEQ || code == LTGT
19268 || code == ORDERED || code == UNORDERED)
19271 /* These comparisons require zero flag; swap operands so they won't. */
19272 if ((code == GT || code == UNLE || code == LE || code == UNGT)
19273 && !TARGET_IEEE_FP)
19278 code = swap_condition (code);
19281 /* Try to expand the comparison and verify that we end up with
19282 carry flag based comparison. This fails to be true only when
19283 we decide to expand comparison using arithmetic that is not
19284 too common scenario. */
19286 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19287 compare_seq = get_insns ();
19290 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
19291 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
19292 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
19294 code = GET_CODE (compare_op);
19296 if (code != LTU && code != GEU)
19299 emit_insn (compare_seq);
19304 if (!INTEGRAL_MODE_P (mode))
19313 /* Convert a==0 into (unsigned)a<1. */
19316 if (op1 != const0_rtx)
19319 code = (code == EQ ? LTU : GEU);
19322 /* Convert a>b into b<a or a>=b-1. */
19325 if (CONST_INT_P (op1))
19327 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
19328 /* Bail out on overflow. We still can swap operands but that
19329 would force loading of the constant into register. */
19330 if (op1 == const0_rtx
19331 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
19333 code = (code == GTU ? GEU : LTU);
19340 code = (code == GTU ? LTU : GEU);
19344 /* Convert a>=0 into (unsigned)a<0x80000000. */
19347 if (mode == DImode || op1 != const0_rtx)
19349 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
19350 code = (code == LT ? GEU : LTU);
19354 if (mode == DImode || op1 != constm1_rtx)
19356 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
19357 code = (code == LE ? GEU : LTU);
19363 /* Swapping operands may cause constant to appear as first operand. */
19364 if (!nonimmediate_operand (op0, VOIDmode))
19366 if (!can_create_pseudo_p ())
19368 op0 = force_reg (mode, op0);
19370 *pop = ix86_expand_compare (code, op0, op1);
19371 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
19376 ix86_expand_int_movcc (rtx operands[])
19378 enum rtx_code code = GET_CODE (operands[1]), compare_code;
19379 rtx compare_seq, compare_op;
19380 enum machine_mode mode = GET_MODE (operands[0]);
19381 bool sign_bit_compare_p = false;
19382 rtx op0 = XEXP (operands[1], 0);
19383 rtx op1 = XEXP (operands[1], 1);
19385 if (GET_MODE (op0) == TImode
19386 || (GET_MODE (op0) == DImode
19391 compare_op = ix86_expand_compare (code, op0, op1);
19392 compare_seq = get_insns ();
19395 compare_code = GET_CODE (compare_op);
19397 if ((op1 == const0_rtx && (code == GE || code == LT))
19398 || (op1 == constm1_rtx && (code == GT || code == LE)))
19399 sign_bit_compare_p = true;
19401 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19402 HImode insns, we'd be swallowed in word prefix ops. */
19404 if ((mode != HImode || TARGET_FAST_PREFIX)
19405 && (mode != (TARGET_64BIT ? TImode : DImode))
19406 && CONST_INT_P (operands[2])
19407 && CONST_INT_P (operands[3]))
19409 rtx out = operands[0];
19410 HOST_WIDE_INT ct = INTVAL (operands[2]);
19411 HOST_WIDE_INT cf = INTVAL (operands[3]);
19412 HOST_WIDE_INT diff;
19415 /* Sign bit compares are better done using shifts than we do by using
19417 if (sign_bit_compare_p
19418 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19420 /* Detect overlap between destination and compare sources. */
19423 if (!sign_bit_compare_p)
19426 bool fpcmp = false;
19428 compare_code = GET_CODE (compare_op);
19430 flags = XEXP (compare_op, 0);
19432 if (GET_MODE (flags) == CCFPmode
19433 || GET_MODE (flags) == CCFPUmode)
19437 = ix86_fp_compare_code_to_integer (compare_code);
19440 /* To simplify rest of code, restrict to the GEU case. */
19441 if (compare_code == LTU)
19443 HOST_WIDE_INT tmp = ct;
19446 compare_code = reverse_condition (compare_code);
19447 code = reverse_condition (code);
19452 PUT_CODE (compare_op,
19453 reverse_condition_maybe_unordered
19454 (GET_CODE (compare_op)));
19456 PUT_CODE (compare_op,
19457 reverse_condition (GET_CODE (compare_op)));
19461 if (reg_overlap_mentioned_p (out, op0)
19462 || reg_overlap_mentioned_p (out, op1))
19463 tmp = gen_reg_rtx (mode);
19465 if (mode == DImode)
19466 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
19468 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
19469 flags, compare_op));
19473 if (code == GT || code == GE)
19474 code = reverse_condition (code);
19477 HOST_WIDE_INT tmp = ct;
19482 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
19495 tmp = expand_simple_binop (mode, PLUS,
19497 copy_rtx (tmp), 1, OPTAB_DIRECT);
19508 tmp = expand_simple_binop (mode, IOR,
19510 copy_rtx (tmp), 1, OPTAB_DIRECT);
19512 else if (diff == -1 && ct)
19522 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19524 tmp = expand_simple_binop (mode, PLUS,
19525 copy_rtx (tmp), GEN_INT (cf),
19526 copy_rtx (tmp), 1, OPTAB_DIRECT);
19534 * andl cf - ct, dest
19544 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19547 tmp = expand_simple_binop (mode, AND,
19549 gen_int_mode (cf - ct, mode),
19550 copy_rtx (tmp), 1, OPTAB_DIRECT);
19552 tmp = expand_simple_binop (mode, PLUS,
19553 copy_rtx (tmp), GEN_INT (ct),
19554 copy_rtx (tmp), 1, OPTAB_DIRECT);
19557 if (!rtx_equal_p (tmp, out))
19558 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
19565 enum machine_mode cmp_mode = GET_MODE (op0);
19568 tmp = ct, ct = cf, cf = tmp;
19571 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19573 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19575 /* We may be reversing unordered compare to normal compare, that
19576 is not valid in general (we may convert non-trapping condition
19577 to trapping one), however on i386 we currently emit all
19578 comparisons unordered. */
19579 compare_code = reverse_condition_maybe_unordered (compare_code);
19580 code = reverse_condition_maybe_unordered (code);
19584 compare_code = reverse_condition (compare_code);
19585 code = reverse_condition (code);
19589 compare_code = UNKNOWN;
19590 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
19591 && CONST_INT_P (op1))
19593 if (op1 == const0_rtx
19594 && (code == LT || code == GE))
19595 compare_code = code;
19596 else if (op1 == constm1_rtx)
19600 else if (code == GT)
19605 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19606 if (compare_code != UNKNOWN
19607 && GET_MODE (op0) == GET_MODE (out)
19608 && (cf == -1 || ct == -1))
19610 /* If lea code below could be used, only optimize
19611 if it results in a 2 insn sequence. */
19613 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
19614 || diff == 3 || diff == 5 || diff == 9)
19615 || (compare_code == LT && ct == -1)
19616 || (compare_code == GE && cf == -1))
19619 * notl op1 (if necessary)
19627 code = reverse_condition (code);
19630 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19632 out = expand_simple_binop (mode, IOR,
19634 out, 1, OPTAB_DIRECT);
19635 if (out != operands[0])
19636 emit_move_insn (operands[0], out);
19643 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19644 || diff == 3 || diff == 5 || diff == 9)
19645 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19647 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19653 * lea cf(dest*(ct-cf)),dest
19657 * This also catches the degenerate setcc-only case.
19663 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19666 /* On x86_64 the lea instruction operates on Pmode, so we need
19667 to get arithmetics done in proper mode to match. */
19669 tmp = copy_rtx (out);
19673 out1 = copy_rtx (out);
19674 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19678 tmp = gen_rtx_PLUS (mode, tmp, out1);
19684 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19687 if (!rtx_equal_p (tmp, out))
19690 out = force_operand (tmp, copy_rtx (out));
19692 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19694 if (!rtx_equal_p (out, operands[0]))
19695 emit_move_insn (operands[0], copy_rtx (out));
19701 * General case: Jumpful:
19702 * xorl dest,dest cmpl op1, op2
19703 * cmpl op1, op2 movl ct, dest
19704 * setcc dest jcc 1f
19705 * decl dest movl cf, dest
19706 * andl (cf-ct),dest 1:
19709 * Size 20. Size 14.
19711 * This is reasonably steep, but branch mispredict costs are
19712 * high on modern cpus, so consider failing only if optimizing
19716 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19717 && BRANCH_COST (optimize_insn_for_speed_p (),
19722 enum machine_mode cmp_mode = GET_MODE (op0);
19727 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19729 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19731 /* We may be reversing unordered compare to normal compare,
19732 that is not valid in general (we may convert non-trapping
19733 condition to trapping one), however on i386 we currently
19734 emit all comparisons unordered. */
19735 code = reverse_condition_maybe_unordered (code);
19739 code = reverse_condition (code);
19740 if (compare_code != UNKNOWN)
19741 compare_code = reverse_condition (compare_code);
19745 if (compare_code != UNKNOWN)
19747 /* notl op1 (if needed)
19752 For x < 0 (resp. x <= -1) there will be no notl,
19753 so if possible swap the constants to get rid of the
19755 True/false will be -1/0 while code below (store flag
19756 followed by decrement) is 0/-1, so the constants need
19757 to be exchanged once more. */
19759 if (compare_code == GE || !cf)
19761 code = reverse_condition (code);
19766 HOST_WIDE_INT tmp = cf;
19771 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19775 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19777 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19779 copy_rtx (out), 1, OPTAB_DIRECT);
19782 out = expand_simple_binop (mode, AND, copy_rtx (out),
19783 gen_int_mode (cf - ct, mode),
19784 copy_rtx (out), 1, OPTAB_DIRECT);
19786 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19787 copy_rtx (out), 1, OPTAB_DIRECT);
19788 if (!rtx_equal_p (out, operands[0]))
19789 emit_move_insn (operands[0], copy_rtx (out));
19795 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19797 /* Try a few things more with specific constants and a variable. */
19800 rtx var, orig_out, out, tmp;
19802 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19805 /* If one of the two operands is an interesting constant, load a
19806 constant with the above and mask it in with a logical operation. */
19808 if (CONST_INT_P (operands[2]))
19811 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19812 operands[3] = constm1_rtx, op = and_optab;
19813 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19814 operands[3] = const0_rtx, op = ior_optab;
19818 else if (CONST_INT_P (operands[3]))
19821 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19822 operands[2] = constm1_rtx, op = and_optab;
19823 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19824 operands[2] = const0_rtx, op = ior_optab;
19831 orig_out = operands[0];
19832 tmp = gen_reg_rtx (mode);
19835 /* Recurse to get the constant loaded. */
19836 if (ix86_expand_int_movcc (operands) == 0)
19839 /* Mask in the interesting variable. */
19840 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19842 if (!rtx_equal_p (out, orig_out))
19843 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19849 * For comparison with above,
19859 if (! nonimmediate_operand (operands[2], mode))
19860 operands[2] = force_reg (mode, operands[2]);
19861 if (! nonimmediate_operand (operands[3], mode))
19862 operands[3] = force_reg (mode, operands[3]);
19864 if (! register_operand (operands[2], VOIDmode)
19866 || ! register_operand (operands[3], VOIDmode)))
19867 operands[2] = force_reg (mode, operands[2]);
19870 && ! register_operand (operands[3], VOIDmode))
19871 operands[3] = force_reg (mode, operands[3]);
19873 emit_insn (compare_seq);
19874 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19875 gen_rtx_IF_THEN_ELSE (mode,
19876 compare_op, operands[2],
19881 /* Swap, force into registers, or otherwise massage the two operands
19882 to an sse comparison with a mask result. Thus we differ a bit from
19883 ix86_prepare_fp_compare_args which expects to produce a flags result.
19885 The DEST operand exists to help determine whether to commute commutative
19886 operators. The POP0/POP1 operands are updated in place. The new
19887 comparison code is returned, or UNKNOWN if not implementable. */
19889 static enum rtx_code
19890 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19891 rtx *pop0, rtx *pop1)
19899 /* AVX supports all the needed comparisons. */
19902 /* We have no LTGT as an operator. We could implement it with
19903 NE & ORDERED, but this requires an extra temporary. It's
19904 not clear that it's worth it. */
19911 /* These are supported directly. */
19918 /* AVX has 3 operand comparisons, no need to swap anything. */
19921 /* For commutative operators, try to canonicalize the destination
19922 operand to be first in the comparison - this helps reload to
19923 avoid extra moves. */
19924 if (!dest || !rtx_equal_p (dest, *pop1))
19932 /* These are not supported directly before AVX, and furthermore
19933 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19934 comparison operands to transform into something that is
19939 code = swap_condition (code);
19943 gcc_unreachable ();
19949 /* Detect conditional moves that exactly match min/max operational
19950 semantics. Note that this is IEEE safe, as long as we don't
19951 interchange the operands.
19953 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19954 and TRUE if the operation is successful and instructions are emitted. */
19957 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19958 rtx cmp_op1, rtx if_true, rtx if_false)
19960 enum machine_mode mode;
19966 else if (code == UNGE)
19969 if_true = if_false;
19975 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19977 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19982 mode = GET_MODE (dest);
19984 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19985 but MODE may be a vector mode and thus not appropriate. */
19986 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19988 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19991 if_true = force_reg (mode, if_true);
19992 v = gen_rtvec (2, if_true, if_false);
19993 tmp = gen_rtx_UNSPEC (mode, v, u);
19997 code = is_min ? SMIN : SMAX;
19998 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20001 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20005 /* Expand an sse vector comparison. Return the register with the result. */
20008 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20009 rtx op_true, rtx op_false)
20011 enum machine_mode mode = GET_MODE (dest);
20012 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
20015 cmp_op0 = force_reg (cmp_mode, cmp_op0);
20016 if (!nonimmediate_operand (cmp_op1, cmp_mode))
20017 cmp_op1 = force_reg (cmp_mode, cmp_op1);
20020 || reg_overlap_mentioned_p (dest, op_true)
20021 || reg_overlap_mentioned_p (dest, op_false))
20022 dest = gen_reg_rtx (mode);
20024 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20025 if (cmp_mode != mode)
20027 x = force_reg (cmp_mode, x);
20028 convert_move (dest, x, false);
20031 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20036 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20037 operations. This is used for both scalar and vector conditional moves. */
20040 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20042 enum machine_mode mode = GET_MODE (dest);
20045 if (vector_all_ones_operand (op_true, mode)
20046 && rtx_equal_p (op_false, CONST0_RTX (mode)))
20048 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20050 else if (op_false == CONST0_RTX (mode))
20052 op_true = force_reg (mode, op_true);
20053 x = gen_rtx_AND (mode, cmp, op_true);
20054 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20056 else if (op_true == CONST0_RTX (mode))
20058 op_false = force_reg (mode, op_false);
20059 x = gen_rtx_NOT (mode, cmp);
20060 x = gen_rtx_AND (mode, x, op_false);
20061 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20063 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
20065 op_false = force_reg (mode, op_false);
20066 x = gen_rtx_IOR (mode, cmp, op_false);
20067 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20069 else if (TARGET_XOP)
20071 op_true = force_reg (mode, op_true);
20073 if (!nonimmediate_operand (op_false, mode))
20074 op_false = force_reg (mode, op_false);
20076 emit_insn (gen_rtx_SET (mode, dest,
20077 gen_rtx_IF_THEN_ELSE (mode, cmp,
20083 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20085 if (!nonimmediate_operand (op_true, mode))
20086 op_true = force_reg (mode, op_true);
20088 op_false = force_reg (mode, op_false);
20094 gen = gen_sse4_1_blendvps;
20098 gen = gen_sse4_1_blendvpd;
20106 gen = gen_sse4_1_pblendvb;
20107 dest = gen_lowpart (V16QImode, dest);
20108 op_false = gen_lowpart (V16QImode, op_false);
20109 op_true = gen_lowpart (V16QImode, op_true);
20110 cmp = gen_lowpart (V16QImode, cmp);
20115 gen = gen_avx_blendvps256;
20119 gen = gen_avx_blendvpd256;
20127 gen = gen_avx2_pblendvb;
20128 dest = gen_lowpart (V32QImode, dest);
20129 op_false = gen_lowpart (V32QImode, op_false);
20130 op_true = gen_lowpart (V32QImode, op_true);
20131 cmp = gen_lowpart (V32QImode, cmp);
20139 emit_insn (gen (dest, op_false, op_true, cmp));
20142 op_true = force_reg (mode, op_true);
20144 t2 = gen_reg_rtx (mode);
20146 t3 = gen_reg_rtx (mode);
20150 x = gen_rtx_AND (mode, op_true, cmp);
20151 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
20153 x = gen_rtx_NOT (mode, cmp);
20154 x = gen_rtx_AND (mode, x, op_false);
20155 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
20157 x = gen_rtx_IOR (mode, t3, t2);
20158 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20163 /* Expand a floating-point conditional move. Return true if successful. */
20166 ix86_expand_fp_movcc (rtx operands[])
20168 enum machine_mode mode = GET_MODE (operands[0]);
20169 enum rtx_code code = GET_CODE (operands[1]);
20170 rtx tmp, compare_op;
20171 rtx op0 = XEXP (operands[1], 0);
20172 rtx op1 = XEXP (operands[1], 1);
20174 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20176 enum machine_mode cmode;
20178 /* Since we've no cmove for sse registers, don't force bad register
20179 allocation just to gain access to it. Deny movcc when the
20180 comparison mode doesn't match the move mode. */
20181 cmode = GET_MODE (op0);
20182 if (cmode == VOIDmode)
20183 cmode = GET_MODE (op1);
20187 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
20188 if (code == UNKNOWN)
20191 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
20192 operands[2], operands[3]))
20195 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
20196 operands[2], operands[3]);
20197 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
20201 if (GET_MODE (op0) == TImode
20202 || (GET_MODE (op0) == DImode
20206 /* The floating point conditional move instructions don't directly
20207 support conditions resulting from a signed integer comparison. */
20209 compare_op = ix86_expand_compare (code, op0, op1);
20210 if (!fcmov_comparison_operator (compare_op, VOIDmode))
20212 tmp = gen_reg_rtx (QImode);
20213 ix86_expand_setcc (tmp, code, op0, op1);
20215 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
20218 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20219 gen_rtx_IF_THEN_ELSE (mode, compare_op,
20220 operands[2], operands[3])));
20225 /* Expand a floating-point vector conditional move; a vcond operation
20226 rather than a movcc operation. */
20229 ix86_expand_fp_vcond (rtx operands[])
20231 enum rtx_code code = GET_CODE (operands[3]);
20234 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
20235 &operands[4], &operands[5]);
20236 if (code == UNKNOWN)
20239 switch (GET_CODE (operands[3]))
20242 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
20243 operands[5], operands[0], operands[0]);
20244 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
20245 operands[5], operands[1], operands[2]);
20249 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
20250 operands[5], operands[0], operands[0]);
20251 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
20252 operands[5], operands[1], operands[2]);
20256 gcc_unreachable ();
20258 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
20260 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
20264 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
20265 operands[5], operands[1], operands[2]))
20268 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
20269 operands[1], operands[2]);
20270 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
20274 /* Expand a signed/unsigned integral vector conditional move. */
20277 ix86_expand_int_vcond (rtx operands[])
20279 enum machine_mode data_mode = GET_MODE (operands[0]);
20280 enum machine_mode mode = GET_MODE (operands[4]);
20281 enum rtx_code code = GET_CODE (operands[3]);
20282 bool negate = false;
20285 cop0 = operands[4];
20286 cop1 = operands[5];
20288 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20289 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20290 if ((code == LT || code == GE)
20291 && data_mode == mode
20292 && cop1 == CONST0_RTX (mode)
20293 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
20294 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
20295 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
20296 && (GET_MODE_SIZE (data_mode) == 16
20297 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
20299 rtx negop = operands[2 - (code == LT)];
20300 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
20301 if (negop == CONST1_RTX (data_mode))
20303 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
20304 operands[0], 1, OPTAB_DIRECT);
20305 if (res != operands[0])
20306 emit_move_insn (operands[0], res);
20309 else if (GET_MODE_INNER (data_mode) != DImode
20310 && vector_all_ones_operand (negop, data_mode))
20312 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
20313 operands[0], 0, OPTAB_DIRECT);
20314 if (res != operands[0])
20315 emit_move_insn (operands[0], res);
20320 if (!nonimmediate_operand (cop1, mode))
20321 cop1 = force_reg (mode, cop1);
20322 if (!general_operand (operands[1], data_mode))
20323 operands[1] = force_reg (data_mode, operands[1]);
20324 if (!general_operand (operands[2], data_mode))
20325 operands[2] = force_reg (data_mode, operands[2]);
20327 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20329 && (mode == V16QImode || mode == V8HImode
20330 || mode == V4SImode || mode == V2DImode))
20334 /* Canonicalize the comparison to EQ, GT, GTU. */
20345 code = reverse_condition (code);
20351 code = reverse_condition (code);
20357 code = swap_condition (code);
20358 x = cop0, cop0 = cop1, cop1 = x;
20362 gcc_unreachable ();
20365 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20366 if (mode == V2DImode)
20371 /* SSE4.1 supports EQ. */
20372 if (!TARGET_SSE4_1)
20378 /* SSE4.2 supports GT/GTU. */
20379 if (!TARGET_SSE4_2)
20384 gcc_unreachable ();
20388 /* Unsigned parallel compare is not supported by the hardware.
20389 Play some tricks to turn this into a signed comparison
20393 cop0 = force_reg (mode, cop0);
20403 rtx (*gen_sub3) (rtx, rtx, rtx);
20407 case V8SImode: gen_sub3 = gen_subv8si3; break;
20408 case V4DImode: gen_sub3 = gen_subv4di3; break;
20409 case V4SImode: gen_sub3 = gen_subv4si3; break;
20410 case V2DImode: gen_sub3 = gen_subv2di3; break;
20412 gcc_unreachable ();
20414 /* Subtract (-(INT MAX) - 1) from both operands to make
20416 mask = ix86_build_signbit_mask (mode, true, false);
20417 t1 = gen_reg_rtx (mode);
20418 emit_insn (gen_sub3 (t1, cop0, mask));
20420 t2 = gen_reg_rtx (mode);
20421 emit_insn (gen_sub3 (t2, cop1, mask));
20433 /* Perform a parallel unsigned saturating subtraction. */
20434 x = gen_reg_rtx (mode);
20435 emit_insn (gen_rtx_SET (VOIDmode, x,
20436 gen_rtx_US_MINUS (mode, cop0, cop1)));
20439 cop1 = CONST0_RTX (mode);
20445 gcc_unreachable ();
20450 /* Allow the comparison to be done in one mode, but the movcc to
20451 happen in another mode. */
20452 if (data_mode == mode)
20454 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
20455 operands[1+negate], operands[2-negate]);
20459 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
20460 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
20462 operands[1+negate], operands[2-negate]);
20463 x = gen_lowpart (data_mode, x);
20466 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
20467 operands[2-negate]);
20471 /* Expand a variable vector permutation. */
20474 ix86_expand_vec_perm (rtx operands[])
20476 rtx target = operands[0];
20477 rtx op0 = operands[1];
20478 rtx op1 = operands[2];
20479 rtx mask = operands[3];
20480 rtx t1, t2, t3, t4, vt, vt2, vec[32];
20481 enum machine_mode mode = GET_MODE (op0);
20482 enum machine_mode maskmode = GET_MODE (mask);
20484 bool one_operand_shuffle = rtx_equal_p (op0, op1);
20486 /* Number of elements in the vector. */
20487 w = GET_MODE_NUNITS (mode);
20488 e = GET_MODE_UNIT_SIZE (mode);
20489 gcc_assert (w <= 32);
20493 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
20495 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20496 an constant shuffle operand. With a tiny bit of effort we can
20497 use VPERMD instead. A re-interpretation stall for V4DFmode is
20498 unfortunate but there's no avoiding it.
20499 Similarly for V16HImode we don't have instructions for variable
20500 shuffling, while for V32QImode we can use after preparing suitable
20501 masks vpshufb; vpshufb; vpermq; vpor. */
20503 if (mode == V16HImode)
20505 maskmode = mode = V32QImode;
20511 maskmode = mode = V8SImode;
20515 t1 = gen_reg_rtx (maskmode);
20517 /* Replicate the low bits of the V4DImode mask into V8SImode:
20519 t1 = { A A B B C C D D }. */
20520 for (i = 0; i < w / 2; ++i)
20521 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
20522 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20523 vt = force_reg (maskmode, vt);
20524 mask = gen_lowpart (maskmode, mask);
20525 if (maskmode == V8SImode)
20526 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
20528 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
20530 /* Multiply the shuffle indicies by two. */
20531 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
20534 /* Add one to the odd shuffle indicies:
20535 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20536 for (i = 0; i < w / 2; ++i)
20538 vec[i * 2] = const0_rtx;
20539 vec[i * 2 + 1] = const1_rtx;
20541 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20542 vt = validize_mem (force_const_mem (maskmode, vt));
20543 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
20546 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20547 operands[3] = mask = t1;
20548 target = gen_lowpart (mode, target);
20549 op0 = gen_lowpart (mode, op0);
20550 op1 = gen_lowpart (mode, op1);
20556 /* The VPERMD and VPERMPS instructions already properly ignore
20557 the high bits of the shuffle elements. No need for us to
20558 perform an AND ourselves. */
20559 if (one_operand_shuffle)
20560 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
20563 t1 = gen_reg_rtx (V8SImode);
20564 t2 = gen_reg_rtx (V8SImode);
20565 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
20566 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
20572 mask = gen_lowpart (V8SFmode, mask);
20573 if (one_operand_shuffle)
20574 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
20577 t1 = gen_reg_rtx (V8SFmode);
20578 t2 = gen_reg_rtx (V8SFmode);
20579 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
20580 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
20586 /* By combining the two 128-bit input vectors into one 256-bit
20587 input vector, we can use VPERMD and VPERMPS for the full
20588 two-operand shuffle. */
20589 t1 = gen_reg_rtx (V8SImode);
20590 t2 = gen_reg_rtx (V8SImode);
20591 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
20592 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20593 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
20594 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
20598 t1 = gen_reg_rtx (V8SFmode);
20599 t2 = gen_reg_rtx (V8SImode);
20600 mask = gen_lowpart (V4SImode, mask);
20601 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
20602 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20603 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
20604 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
20608 t1 = gen_reg_rtx (V32QImode);
20609 t2 = gen_reg_rtx (V32QImode);
20610 t3 = gen_reg_rtx (V32QImode);
20611 vt2 = GEN_INT (128);
20612 for (i = 0; i < 32; i++)
20614 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20615 vt = force_reg (V32QImode, vt);
20616 for (i = 0; i < 32; i++)
20617 vec[i] = i < 16 ? vt2 : const0_rtx;
20618 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20619 vt2 = force_reg (V32QImode, vt2);
20620 /* From mask create two adjusted masks, which contain the same
20621 bits as mask in the low 7 bits of each vector element.
20622 The first mask will have the most significant bit clear
20623 if it requests element from the same 128-bit lane
20624 and MSB set if it requests element from the other 128-bit lane.
20625 The second mask will have the opposite values of the MSB,
20626 and additionally will have its 128-bit lanes swapped.
20627 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20628 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20629 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20630 stands for other 12 bytes. */
20631 /* The bit whether element is from the same lane or the other
20632 lane is bit 4, so shift it up by 3 to the MSB position. */
20633 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
20634 gen_lowpart (V4DImode, mask),
20636 /* Clear MSB bits from the mask just in case it had them set. */
20637 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20638 /* After this t1 will have MSB set for elements from other lane. */
20639 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20640 /* Clear bits other than MSB. */
20641 emit_insn (gen_andv32qi3 (t1, t1, vt));
20642 /* Or in the lower bits from mask into t3. */
20643 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20644 /* And invert MSB bits in t1, so MSB is set for elements from the same
20646 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20647 /* Swap 128-bit lanes in t3. */
20648 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20649 gen_lowpart (V4DImode, t3),
20650 const2_rtx, GEN_INT (3),
20651 const0_rtx, const1_rtx));
20652 /* And or in the lower bits from mask into t1. */
20653 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20654 if (one_operand_shuffle)
20656 /* Each of these shuffles will put 0s in places where
20657 element from the other 128-bit lane is needed, otherwise
20658 will shuffle in the requested value. */
20659 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20660 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20661 /* For t3 the 128-bit lanes are swapped again. */
20662 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20663 gen_lowpart (V4DImode, t3),
20664 const2_rtx, GEN_INT (3),
20665 const0_rtx, const1_rtx));
20666 /* And oring both together leads to the result. */
20667 emit_insn (gen_iorv32qi3 (target, t1, t3));
20671 t4 = gen_reg_rtx (V32QImode);
20672 /* Similarly to the above one_operand_shuffle code,
20673 just for repeated twice for each operand. merge_two:
20674 code will merge the two results together. */
20675 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20676 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20677 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20678 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20679 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20680 gen_lowpart (V4DImode, t4),
20681 const2_rtx, GEN_INT (3),
20682 const0_rtx, const1_rtx));
20683 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20684 gen_lowpart (V4DImode, t3),
20685 const2_rtx, GEN_INT (3),
20686 const0_rtx, const1_rtx));
20687 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20688 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20694 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20701 /* The XOP VPPERM insn supports three inputs. By ignoring the
20702 one_operand_shuffle special case, we avoid creating another
20703 set of constant vectors in memory. */
20704 one_operand_shuffle = false;
20706 /* mask = mask & {2*w-1, ...} */
20707 vt = GEN_INT (2*w - 1);
20711 /* mask = mask & {w-1, ...} */
20712 vt = GEN_INT (w - 1);
20715 for (i = 0; i < w; i++)
20717 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20718 mask = expand_simple_binop (maskmode, AND, mask, vt,
20719 NULL_RTX, 0, OPTAB_DIRECT);
20721 /* For non-QImode operations, convert the word permutation control
20722 into a byte permutation control. */
20723 if (mode != V16QImode)
20725 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20726 GEN_INT (exact_log2 (e)),
20727 NULL_RTX, 0, OPTAB_DIRECT);
20729 /* Convert mask to vector of chars. */
20730 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20732 /* Replicate each of the input bytes into byte positions:
20733 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20734 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20735 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20736 for (i = 0; i < 16; ++i)
20737 vec[i] = GEN_INT (i/e * e);
20738 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20739 vt = validize_mem (force_const_mem (V16QImode, vt));
20741 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20743 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20745 /* Convert it into the byte positions by doing
20746 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20747 for (i = 0; i < 16; ++i)
20748 vec[i] = GEN_INT (i % e);
20749 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20750 vt = validize_mem (force_const_mem (V16QImode, vt));
20751 emit_insn (gen_addv16qi3 (mask, mask, vt));
20754 /* The actual shuffle operations all operate on V16QImode. */
20755 op0 = gen_lowpart (V16QImode, op0);
20756 op1 = gen_lowpart (V16QImode, op1);
20757 target = gen_lowpart (V16QImode, target);
20761 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20763 else if (one_operand_shuffle)
20765 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20772 /* Shuffle the two input vectors independently. */
20773 t1 = gen_reg_rtx (V16QImode);
20774 t2 = gen_reg_rtx (V16QImode);
20775 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20776 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20779 /* Then merge them together. The key is whether any given control
20780 element contained a bit set that indicates the second word. */
20781 mask = operands[3];
20783 if (maskmode == V2DImode && !TARGET_SSE4_1)
20785 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20786 more shuffle to convert the V2DI input mask into a V4SI
20787 input mask. At which point the masking that expand_int_vcond
20788 will work as desired. */
20789 rtx t3 = gen_reg_rtx (V4SImode);
20790 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20791 const0_rtx, const0_rtx,
20792 const2_rtx, const2_rtx));
20794 maskmode = V4SImode;
20798 for (i = 0; i < w; i++)
20800 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20801 vt = force_reg (maskmode, vt);
20802 mask = expand_simple_binop (maskmode, AND, mask, vt,
20803 NULL_RTX, 0, OPTAB_DIRECT);
20805 xops[0] = gen_lowpart (mode, operands[0]);
20806 xops[1] = gen_lowpart (mode, t2);
20807 xops[2] = gen_lowpart (mode, t1);
20808 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20811 ok = ix86_expand_int_vcond (xops);
20816 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20817 true if we should do zero extension, else sign extension. HIGH_P is
20818 true if we want the N/2 high elements, else the low elements. */
20821 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
20823 enum machine_mode imode = GET_MODE (src);
20828 rtx (*unpack)(rtx, rtx);
20829 rtx (*extract)(rtx, rtx) = NULL;
20830 enum machine_mode halfmode = BLKmode;
20836 unpack = gen_avx2_zero_extendv16qiv16hi2;
20838 unpack = gen_avx2_sign_extendv16qiv16hi2;
20839 halfmode = V16QImode;
20841 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20845 unpack = gen_avx2_zero_extendv8hiv8si2;
20847 unpack = gen_avx2_sign_extendv8hiv8si2;
20848 halfmode = V8HImode;
20850 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20854 unpack = gen_avx2_zero_extendv4siv4di2;
20856 unpack = gen_avx2_sign_extendv4siv4di2;
20857 halfmode = V4SImode;
20859 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20863 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20865 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20869 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20871 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20875 unpack = gen_sse4_1_zero_extendv2siv2di2;
20877 unpack = gen_sse4_1_sign_extendv2siv2di2;
20880 gcc_unreachable ();
20883 if (GET_MODE_SIZE (imode) == 32)
20885 tmp = gen_reg_rtx (halfmode);
20886 emit_insn (extract (tmp, src));
20890 /* Shift higher 8 bytes to lower 8 bytes. */
20891 tmp = gen_reg_rtx (imode);
20892 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20893 gen_lowpart (V1TImode, src),
20899 emit_insn (unpack (dest, tmp));
20903 rtx (*unpack)(rtx, rtx, rtx);
20909 unpack = gen_vec_interleave_highv16qi;
20911 unpack = gen_vec_interleave_lowv16qi;
20915 unpack = gen_vec_interleave_highv8hi;
20917 unpack = gen_vec_interleave_lowv8hi;
20921 unpack = gen_vec_interleave_highv4si;
20923 unpack = gen_vec_interleave_lowv4si;
20926 gcc_unreachable ();
20930 tmp = force_reg (imode, CONST0_RTX (imode));
20932 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20933 src, pc_rtx, pc_rtx);
20935 emit_insn (unpack (gen_lowpart (imode, dest), src, tmp));
20939 /* Expand conditional increment or decrement using adb/sbb instructions.
20940 The default case using setcc followed by the conditional move can be
20941 done by generic code. */
20943 ix86_expand_int_addcc (rtx operands[])
20945 enum rtx_code code = GET_CODE (operands[1]);
20947 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20949 rtx val = const0_rtx;
20950 bool fpcmp = false;
20951 enum machine_mode mode;
20952 rtx op0 = XEXP (operands[1], 0);
20953 rtx op1 = XEXP (operands[1], 1);
20955 if (operands[3] != const1_rtx
20956 && operands[3] != constm1_rtx)
20958 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20960 code = GET_CODE (compare_op);
20962 flags = XEXP (compare_op, 0);
20964 if (GET_MODE (flags) == CCFPmode
20965 || GET_MODE (flags) == CCFPUmode)
20968 code = ix86_fp_compare_code_to_integer (code);
20975 PUT_CODE (compare_op,
20976 reverse_condition_maybe_unordered
20977 (GET_CODE (compare_op)));
20979 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20982 mode = GET_MODE (operands[0]);
20984 /* Construct either adc or sbb insn. */
20985 if ((code == LTU) == (operands[3] == constm1_rtx))
20990 insn = gen_subqi3_carry;
20993 insn = gen_subhi3_carry;
20996 insn = gen_subsi3_carry;
20999 insn = gen_subdi3_carry;
21002 gcc_unreachable ();
21010 insn = gen_addqi3_carry;
21013 insn = gen_addhi3_carry;
21016 insn = gen_addsi3_carry;
21019 insn = gen_adddi3_carry;
21022 gcc_unreachable ();
21025 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21031 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21032 but works for floating pointer parameters and nonoffsetable memories.
21033 For pushes, it returns just stack offsets; the values will be saved
21034 in the right order. Maximally three parts are generated. */
21037 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21042 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21044 size = (GET_MODE_SIZE (mode) + 4) / 8;
21046 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21047 gcc_assert (size >= 2 && size <= 4);
21049 /* Optimize constant pool reference to immediates. This is used by fp
21050 moves, that force all constants to memory to allow combining. */
21051 if (MEM_P (operand) && MEM_READONLY_P (operand))
21053 rtx tmp = maybe_get_pool_constant (operand);
21058 if (MEM_P (operand) && !offsettable_memref_p (operand))
21060 /* The only non-offsetable memories we handle are pushes. */
21061 int ok = push_operand (operand, VOIDmode);
21065 operand = copy_rtx (operand);
21066 PUT_MODE (operand, word_mode);
21067 parts[0] = parts[1] = parts[2] = parts[3] = operand;
21071 if (GET_CODE (operand) == CONST_VECTOR)
21073 enum machine_mode imode = int_mode_for_mode (mode);
21074 /* Caution: if we looked through a constant pool memory above,
21075 the operand may actually have a different mode now. That's
21076 ok, since we want to pun this all the way back to an integer. */
21077 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
21078 gcc_assert (operand != NULL);
21084 if (mode == DImode)
21085 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
21090 if (REG_P (operand))
21092 gcc_assert (reload_completed);
21093 for (i = 0; i < size; i++)
21094 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
21096 else if (offsettable_memref_p (operand))
21098 operand = adjust_address (operand, SImode, 0);
21099 parts[0] = operand;
21100 for (i = 1; i < size; i++)
21101 parts[i] = adjust_address (operand, SImode, 4 * i);
21103 else if (GET_CODE (operand) == CONST_DOUBLE)
21108 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21112 real_to_target (l, &r, mode);
21113 parts[3] = gen_int_mode (l[3], SImode);
21114 parts[2] = gen_int_mode (l[2], SImode);
21117 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21118 long double may not be 80-bit. */
21119 real_to_target (l, &r, mode);
21120 parts[2] = gen_int_mode (l[2], SImode);
21123 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
21126 gcc_unreachable ();
21128 parts[1] = gen_int_mode (l[1], SImode);
21129 parts[0] = gen_int_mode (l[0], SImode);
21132 gcc_unreachable ();
21137 if (mode == TImode)
21138 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
21139 if (mode == XFmode || mode == TFmode)
21141 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
21142 if (REG_P (operand))
21144 gcc_assert (reload_completed);
21145 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
21146 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
21148 else if (offsettable_memref_p (operand))
21150 operand = adjust_address (operand, DImode, 0);
21151 parts[0] = operand;
21152 parts[1] = adjust_address (operand, upper_mode, 8);
21154 else if (GET_CODE (operand) == CONST_DOUBLE)
21159 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21160 real_to_target (l, &r, mode);
21162 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21163 if (HOST_BITS_PER_WIDE_INT >= 64)
21166 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
21167 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
21170 parts[0] = immed_double_const (l[0], l[1], DImode);
21172 if (upper_mode == SImode)
21173 parts[1] = gen_int_mode (l[2], SImode);
21174 else if (HOST_BITS_PER_WIDE_INT >= 64)
21177 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
21178 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
21181 parts[1] = immed_double_const (l[2], l[3], DImode);
21184 gcc_unreachable ();
21191 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21192 Return false when normal moves are needed; true when all required
21193 insns have been emitted. Operands 2-4 contain the input values
21194 int the correct order; operands 5-7 contain the output values. */
21197 ix86_split_long_move (rtx operands[])
21202 int collisions = 0;
21203 enum machine_mode mode = GET_MODE (operands[0]);
21204 bool collisionparts[4];
21206 /* The DFmode expanders may ask us to move double.
21207 For 64bit target this is single move. By hiding the fact
21208 here we simplify i386.md splitters. */
21209 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
21211 /* Optimize constant pool reference to immediates. This is used by
21212 fp moves, that force all constants to memory to allow combining. */
21214 if (MEM_P (operands[1])
21215 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
21216 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
21217 operands[1] = get_pool_constant (XEXP (operands[1], 0));
21218 if (push_operand (operands[0], VOIDmode))
21220 operands[0] = copy_rtx (operands[0]);
21221 PUT_MODE (operands[0], word_mode);
21224 operands[0] = gen_lowpart (DImode, operands[0]);
21225 operands[1] = gen_lowpart (DImode, operands[1]);
21226 emit_move_insn (operands[0], operands[1]);
21230 /* The only non-offsettable memory we handle is push. */
21231 if (push_operand (operands[0], VOIDmode))
21234 gcc_assert (!MEM_P (operands[0])
21235 || offsettable_memref_p (operands[0]));
21237 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
21238 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
21240 /* When emitting push, take care for source operands on the stack. */
21241 if (push && MEM_P (operands[1])
21242 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
21244 rtx src_base = XEXP (part[1][nparts - 1], 0);
21246 /* Compensate for the stack decrement by 4. */
21247 if (!TARGET_64BIT && nparts == 3
21248 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
21249 src_base = plus_constant (Pmode, src_base, 4);
21251 /* src_base refers to the stack pointer and is
21252 automatically decreased by emitted push. */
21253 for (i = 0; i < nparts; i++)
21254 part[1][i] = change_address (part[1][i],
21255 GET_MODE (part[1][i]), src_base);
21258 /* We need to do copy in the right order in case an address register
21259 of the source overlaps the destination. */
21260 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
21264 for (i = 0; i < nparts; i++)
21267 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
21268 if (collisionparts[i])
21272 /* Collision in the middle part can be handled by reordering. */
21273 if (collisions == 1 && nparts == 3 && collisionparts [1])
21275 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
21276 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
21278 else if (collisions == 1
21280 && (collisionparts [1] || collisionparts [2]))
21282 if (collisionparts [1])
21284 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
21285 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
21289 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
21290 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
21294 /* If there are more collisions, we can't handle it by reordering.
21295 Do an lea to the last part and use only one colliding move. */
21296 else if (collisions > 1)
21302 base = part[0][nparts - 1];
21304 /* Handle the case when the last part isn't valid for lea.
21305 Happens in 64-bit mode storing the 12-byte XFmode. */
21306 if (GET_MODE (base) != Pmode)
21307 base = gen_rtx_REG (Pmode, REGNO (base));
21309 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
21310 part[1][0] = replace_equiv_address (part[1][0], base);
21311 for (i = 1; i < nparts; i++)
21313 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
21314 part[1][i] = replace_equiv_address (part[1][i], tmp);
21325 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
21326 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
21327 stack_pointer_rtx, GEN_INT (-4)));
21328 emit_move_insn (part[0][2], part[1][2]);
21330 else if (nparts == 4)
21332 emit_move_insn (part[0][3], part[1][3]);
21333 emit_move_insn (part[0][2], part[1][2]);
21338 /* In 64bit mode we don't have 32bit push available. In case this is
21339 register, it is OK - we will just use larger counterpart. We also
21340 retype memory - these comes from attempt to avoid REX prefix on
21341 moving of second half of TFmode value. */
21342 if (GET_MODE (part[1][1]) == SImode)
21344 switch (GET_CODE (part[1][1]))
21347 part[1][1] = adjust_address (part[1][1], DImode, 0);
21351 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
21355 gcc_unreachable ();
21358 if (GET_MODE (part[1][0]) == SImode)
21359 part[1][0] = part[1][1];
21362 emit_move_insn (part[0][1], part[1][1]);
21363 emit_move_insn (part[0][0], part[1][0]);
21367 /* Choose correct order to not overwrite the source before it is copied. */
21368 if ((REG_P (part[0][0])
21369 && REG_P (part[1][1])
21370 && (REGNO (part[0][0]) == REGNO (part[1][1])
21372 && REGNO (part[0][0]) == REGNO (part[1][2]))
21374 && REGNO (part[0][0]) == REGNO (part[1][3]))))
21376 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
21378 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
21380 operands[2 + i] = part[0][j];
21381 operands[6 + i] = part[1][j];
21386 for (i = 0; i < nparts; i++)
21388 operands[2 + i] = part[0][i];
21389 operands[6 + i] = part[1][i];
21393 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21394 if (optimize_insn_for_size_p ())
21396 for (j = 0; j < nparts - 1; j++)
21397 if (CONST_INT_P (operands[6 + j])
21398 && operands[6 + j] != const0_rtx
21399 && REG_P (operands[2 + j]))
21400 for (i = j; i < nparts - 1; i++)
21401 if (CONST_INT_P (operands[7 + i])
21402 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
21403 operands[7 + i] = operands[2 + j];
21406 for (i = 0; i < nparts; i++)
21407 emit_move_insn (operands[2 + i], operands[6 + i]);
21412 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21413 left shift by a constant, either using a single shift or
21414 a sequence of add instructions. */
21417 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
21419 rtx (*insn)(rtx, rtx, rtx);
21422 || (count * ix86_cost->add <= ix86_cost->shift_const
21423 && !optimize_insn_for_size_p ()))
21425 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
21426 while (count-- > 0)
21427 emit_insn (insn (operand, operand, operand));
21431 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
21432 emit_insn (insn (operand, operand, GEN_INT (count)));
21437 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
21439 rtx (*gen_ashl3)(rtx, rtx, rtx);
21440 rtx (*gen_shld)(rtx, rtx, rtx);
21441 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21443 rtx low[2], high[2];
21446 if (CONST_INT_P (operands[2]))
21448 split_double_mode (mode, operands, 2, low, high);
21449 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21451 if (count >= half_width)
21453 emit_move_insn (high[0], low[1]);
21454 emit_move_insn (low[0], const0_rtx);
21456 if (count > half_width)
21457 ix86_expand_ashl_const (high[0], count - half_width, mode);
21461 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21463 if (!rtx_equal_p (operands[0], operands[1]))
21464 emit_move_insn (operands[0], operands[1]);
21466 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
21467 ix86_expand_ashl_const (low[0], count, mode);
21472 split_double_mode (mode, operands, 1, low, high);
21474 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
21476 if (operands[1] == const1_rtx)
21478 /* Assuming we've chosen a QImode capable registers, then 1 << N
21479 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21480 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
21482 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
21484 ix86_expand_clear (low[0]);
21485 ix86_expand_clear (high[0]);
21486 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
21488 d = gen_lowpart (QImode, low[0]);
21489 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
21490 s = gen_rtx_EQ (QImode, flags, const0_rtx);
21491 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21493 d = gen_lowpart (QImode, high[0]);
21494 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
21495 s = gen_rtx_NE (QImode, flags, const0_rtx);
21496 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21499 /* Otherwise, we can get the same results by manually performing
21500 a bit extract operation on bit 5/6, and then performing the two
21501 shifts. The two methods of getting 0/1 into low/high are exactly
21502 the same size. Avoiding the shift in the bit extract case helps
21503 pentium4 a bit; no one else seems to care much either way. */
21506 enum machine_mode half_mode;
21507 rtx (*gen_lshr3)(rtx, rtx, rtx);
21508 rtx (*gen_and3)(rtx, rtx, rtx);
21509 rtx (*gen_xor3)(rtx, rtx, rtx);
21510 HOST_WIDE_INT bits;
21513 if (mode == DImode)
21515 half_mode = SImode;
21516 gen_lshr3 = gen_lshrsi3;
21517 gen_and3 = gen_andsi3;
21518 gen_xor3 = gen_xorsi3;
21523 half_mode = DImode;
21524 gen_lshr3 = gen_lshrdi3;
21525 gen_and3 = gen_anddi3;
21526 gen_xor3 = gen_xordi3;
21530 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
21531 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
21533 x = gen_lowpart (half_mode, operands[2]);
21534 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
21536 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
21537 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
21538 emit_move_insn (low[0], high[0]);
21539 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
21542 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21543 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
21547 if (operands[1] == constm1_rtx)
21549 /* For -1 << N, we can avoid the shld instruction, because we
21550 know that we're shifting 0...31/63 ones into a -1. */
21551 emit_move_insn (low[0], constm1_rtx);
21552 if (optimize_insn_for_size_p ())
21553 emit_move_insn (high[0], low[0]);
21555 emit_move_insn (high[0], constm1_rtx);
21559 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21561 if (!rtx_equal_p (operands[0], operands[1]))
21562 emit_move_insn (operands[0], operands[1]);
21564 split_double_mode (mode, operands, 1, low, high);
21565 emit_insn (gen_shld (high[0], low[0], operands[2]));
21568 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21570 if (TARGET_CMOVE && scratch)
21572 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21573 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21575 ix86_expand_clear (scratch);
21576 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
21580 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21581 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21583 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
21588 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
21590 rtx (*gen_ashr3)(rtx, rtx, rtx)
21591 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
21592 rtx (*gen_shrd)(rtx, rtx, rtx);
21593 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21595 rtx low[2], high[2];
21598 if (CONST_INT_P (operands[2]))
21600 split_double_mode (mode, operands, 2, low, high);
21601 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21603 if (count == GET_MODE_BITSIZE (mode) - 1)
21605 emit_move_insn (high[0], high[1]);
21606 emit_insn (gen_ashr3 (high[0], high[0],
21607 GEN_INT (half_width - 1)));
21608 emit_move_insn (low[0], high[0]);
21611 else if (count >= half_width)
21613 emit_move_insn (low[0], high[1]);
21614 emit_move_insn (high[0], low[0]);
21615 emit_insn (gen_ashr3 (high[0], high[0],
21616 GEN_INT (half_width - 1)));
21618 if (count > half_width)
21619 emit_insn (gen_ashr3 (low[0], low[0],
21620 GEN_INT (count - half_width)));
21624 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21626 if (!rtx_equal_p (operands[0], operands[1]))
21627 emit_move_insn (operands[0], operands[1]);
21629 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21630 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
21635 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21637 if (!rtx_equal_p (operands[0], operands[1]))
21638 emit_move_insn (operands[0], operands[1]);
21640 split_double_mode (mode, operands, 1, low, high);
21642 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21643 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21645 if (TARGET_CMOVE && scratch)
21647 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21648 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21650 emit_move_insn (scratch, high[0]);
21651 emit_insn (gen_ashr3 (scratch, scratch,
21652 GEN_INT (half_width - 1)));
21653 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21658 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21659 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21661 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21667 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21669 rtx (*gen_lshr3)(rtx, rtx, rtx)
21670 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21671 rtx (*gen_shrd)(rtx, rtx, rtx);
21672 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21674 rtx low[2], high[2];
21677 if (CONST_INT_P (operands[2]))
21679 split_double_mode (mode, operands, 2, low, high);
21680 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21682 if (count >= half_width)
21684 emit_move_insn (low[0], high[1]);
21685 ix86_expand_clear (high[0]);
21687 if (count > half_width)
21688 emit_insn (gen_lshr3 (low[0], low[0],
21689 GEN_INT (count - half_width)));
21693 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21695 if (!rtx_equal_p (operands[0], operands[1]))
21696 emit_move_insn (operands[0], operands[1]);
21698 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21699 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21704 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21706 if (!rtx_equal_p (operands[0], operands[1]))
21707 emit_move_insn (operands[0], operands[1]);
21709 split_double_mode (mode, operands, 1, low, high);
21711 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21712 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21714 if (TARGET_CMOVE && scratch)
21716 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21717 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21719 ix86_expand_clear (scratch);
21720 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21725 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21726 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21728 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21733 /* Predict just emitted jump instruction to be taken with probability PROB. */
21735 predict_jump (int prob)
21737 rtx insn = get_last_insn ();
21738 gcc_assert (JUMP_P (insn));
21739 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21742 /* Helper function for the string operations below. Dest VARIABLE whether
21743 it is aligned to VALUE bytes. If true, jump to the label. */
21745 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21747 rtx label = gen_label_rtx ();
21748 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21749 if (GET_MODE (variable) == DImode)
21750 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21752 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21753 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21756 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21758 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21762 /* Adjust COUNTER by the VALUE. */
21764 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21766 rtx (*gen_add)(rtx, rtx, rtx)
21767 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21769 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21772 /* Zero extend possibly SImode EXP to Pmode register. */
21774 ix86_zero_extend_to_Pmode (rtx exp)
21776 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
21779 /* Divide COUNTREG by SCALE. */
21781 scale_counter (rtx countreg, int scale)
21787 if (CONST_INT_P (countreg))
21788 return GEN_INT (INTVAL (countreg) / scale);
21789 gcc_assert (REG_P (countreg));
21791 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21792 GEN_INT (exact_log2 (scale)),
21793 NULL, 1, OPTAB_DIRECT);
21797 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21798 DImode for constant loop counts. */
21800 static enum machine_mode
21801 counter_mode (rtx count_exp)
21803 if (GET_MODE (count_exp) != VOIDmode)
21804 return GET_MODE (count_exp);
21805 if (!CONST_INT_P (count_exp))
21807 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21812 /* When SRCPTR is non-NULL, output simple loop to move memory
21813 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21814 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21815 equivalent loop to set memory by VALUE (supposed to be in MODE).
21817 The size is rounded down to whole number of chunk size moved at once.
21818 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21822 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21823 rtx destptr, rtx srcptr, rtx value,
21824 rtx count, enum machine_mode mode, int unroll,
21827 rtx out_label, top_label, iter, tmp;
21828 enum machine_mode iter_mode = counter_mode (count);
21829 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21830 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21836 top_label = gen_label_rtx ();
21837 out_label = gen_label_rtx ();
21838 iter = gen_reg_rtx (iter_mode);
21840 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21841 NULL, 1, OPTAB_DIRECT);
21842 /* Those two should combine. */
21843 if (piece_size == const1_rtx)
21845 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21847 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21849 emit_move_insn (iter, const0_rtx);
21851 emit_label (top_label);
21853 tmp = convert_modes (Pmode, iter_mode, iter, true);
21854 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21855 destmem = change_address (destmem, mode, x_addr);
21859 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21860 srcmem = change_address (srcmem, mode, y_addr);
21862 /* When unrolling for chips that reorder memory reads and writes,
21863 we can save registers by using single temporary.
21864 Also using 4 temporaries is overkill in 32bit mode. */
21865 if (!TARGET_64BIT && 0)
21867 for (i = 0; i < unroll; i++)
21872 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21874 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21876 emit_move_insn (destmem, srcmem);
21882 gcc_assert (unroll <= 4);
21883 for (i = 0; i < unroll; i++)
21885 tmpreg[i] = gen_reg_rtx (mode);
21889 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21891 emit_move_insn (tmpreg[i], srcmem);
21893 for (i = 0; i < unroll; i++)
21898 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21900 emit_move_insn (destmem, tmpreg[i]);
21905 for (i = 0; i < unroll; i++)
21909 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21910 emit_move_insn (destmem, value);
21913 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21914 true, OPTAB_LIB_WIDEN);
21916 emit_move_insn (iter, tmp);
21918 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21920 if (expected_size != -1)
21922 expected_size /= GET_MODE_SIZE (mode) * unroll;
21923 if (expected_size == 0)
21925 else if (expected_size > REG_BR_PROB_BASE)
21926 predict_jump (REG_BR_PROB_BASE - 1);
21928 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21931 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21932 iter = ix86_zero_extend_to_Pmode (iter);
21933 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21934 true, OPTAB_LIB_WIDEN);
21935 if (tmp != destptr)
21936 emit_move_insn (destptr, tmp);
21939 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21940 true, OPTAB_LIB_WIDEN);
21942 emit_move_insn (srcptr, tmp);
21944 emit_label (out_label);
21947 /* Output "rep; mov" instruction.
21948 Arguments have same meaning as for previous function */
21950 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21951 rtx destptr, rtx srcptr,
21953 enum machine_mode mode)
21958 HOST_WIDE_INT rounded_count;
21960 /* If the size is known, it is shorter to use rep movs. */
21961 if (mode == QImode && CONST_INT_P (count)
21962 && !(INTVAL (count) & 3))
21965 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21966 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21967 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21968 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21969 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21970 if (mode != QImode)
21972 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21973 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21974 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21975 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21976 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21977 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21981 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21982 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21984 if (CONST_INT_P (count))
21986 rounded_count = (INTVAL (count)
21987 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21988 destmem = shallow_copy_rtx (destmem);
21989 srcmem = shallow_copy_rtx (srcmem);
21990 set_mem_size (destmem, rounded_count);
21991 set_mem_size (srcmem, rounded_count);
21995 if (MEM_SIZE_KNOWN_P (destmem))
21996 clear_mem_size (destmem);
21997 if (MEM_SIZE_KNOWN_P (srcmem))
21998 clear_mem_size (srcmem);
22000 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22004 /* Output "rep; stos" instruction.
22005 Arguments have same meaning as for previous function */
22007 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
22008 rtx count, enum machine_mode mode,
22013 HOST_WIDE_INT rounded_count;
22015 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22016 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22017 value = force_reg (mode, gen_lowpart (mode, value));
22018 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
22019 if (mode != QImode)
22021 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22022 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22023 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22026 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22027 if (orig_value == const0_rtx && CONST_INT_P (count))
22029 rounded_count = (INTVAL (count)
22030 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22031 destmem = shallow_copy_rtx (destmem);
22032 set_mem_size (destmem, rounded_count);
22034 else if (MEM_SIZE_KNOWN_P (destmem))
22035 clear_mem_size (destmem);
22036 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22040 emit_strmov (rtx destmem, rtx srcmem,
22041 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
22043 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
22044 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
22045 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22048 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22050 expand_movmem_epilogue (rtx destmem, rtx srcmem,
22051 rtx destptr, rtx srcptr, rtx count, int max_size)
22054 if (CONST_INT_P (count))
22056 HOST_WIDE_INT countval = INTVAL (count);
22059 if ((countval & 0x10) && max_size > 16)
22063 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
22064 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
22067 gcc_unreachable ();
22070 if ((countval & 0x08) && max_size > 8)
22073 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
22076 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
22077 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
22081 if ((countval & 0x04) && max_size > 4)
22083 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
22086 if ((countval & 0x02) && max_size > 2)
22088 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
22091 if ((countval & 0x01) && max_size > 1)
22093 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
22100 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
22101 count, 1, OPTAB_DIRECT);
22102 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
22103 count, QImode, 1, 4);
22107 /* When there are stringops, we can cheaply increase dest and src pointers.
22108 Otherwise we save code size by maintaining offset (zero is readily
22109 available from preceding rep operation) and using x86 addressing modes.
22111 if (TARGET_SINGLE_STRINGOP)
22115 rtx label = ix86_expand_aligntest (count, 4, true);
22116 src = change_address (srcmem, SImode, srcptr);
22117 dest = change_address (destmem, SImode, destptr);
22118 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22119 emit_label (label);
22120 LABEL_NUSES (label) = 1;
22124 rtx label = ix86_expand_aligntest (count, 2, true);
22125 src = change_address (srcmem, HImode, srcptr);
22126 dest = change_address (destmem, HImode, destptr);
22127 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22128 emit_label (label);
22129 LABEL_NUSES (label) = 1;
22133 rtx label = ix86_expand_aligntest (count, 1, true);
22134 src = change_address (srcmem, QImode, srcptr);
22135 dest = change_address (destmem, QImode, destptr);
22136 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22137 emit_label (label);
22138 LABEL_NUSES (label) = 1;
22143 rtx offset = force_reg (Pmode, const0_rtx);
22148 rtx label = ix86_expand_aligntest (count, 4, true);
22149 src = change_address (srcmem, SImode, srcptr);
22150 dest = change_address (destmem, SImode, destptr);
22151 emit_move_insn (dest, src);
22152 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
22153 true, OPTAB_LIB_WIDEN);
22155 emit_move_insn (offset, tmp);
22156 emit_label (label);
22157 LABEL_NUSES (label) = 1;
22161 rtx label = ix86_expand_aligntest (count, 2, true);
22162 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
22163 src = change_address (srcmem, HImode, tmp);
22164 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
22165 dest = change_address (destmem, HImode, tmp);
22166 emit_move_insn (dest, src);
22167 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
22168 true, OPTAB_LIB_WIDEN);
22170 emit_move_insn (offset, tmp);
22171 emit_label (label);
22172 LABEL_NUSES (label) = 1;
22176 rtx label = ix86_expand_aligntest (count, 1, true);
22177 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
22178 src = change_address (srcmem, QImode, tmp);
22179 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
22180 dest = change_address (destmem, QImode, tmp);
22181 emit_move_insn (dest, src);
22182 emit_label (label);
22183 LABEL_NUSES (label) = 1;
22188 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22190 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
22191 rtx count, int max_size)
22194 expand_simple_binop (counter_mode (count), AND, count,
22195 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
22196 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
22197 gen_lowpart (QImode, value), count, QImode,
22201 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22203 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
22207 if (CONST_INT_P (count))
22209 HOST_WIDE_INT countval = INTVAL (count);
22212 if ((countval & 0x10) && max_size > 16)
22216 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
22217 emit_insn (gen_strset (destptr, dest, value));
22218 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
22219 emit_insn (gen_strset (destptr, dest, value));
22222 gcc_unreachable ();
22225 if ((countval & 0x08) && max_size > 8)
22229 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
22230 emit_insn (gen_strset (destptr, dest, value));
22234 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
22235 emit_insn (gen_strset (destptr, dest, value));
22236 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
22237 emit_insn (gen_strset (destptr, dest, value));
22241 if ((countval & 0x04) && max_size > 4)
22243 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
22244 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
22247 if ((countval & 0x02) && max_size > 2)
22249 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
22250 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
22253 if ((countval & 0x01) && max_size > 1)
22255 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
22256 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
22263 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
22268 rtx label = ix86_expand_aligntest (count, 16, true);
22271 dest = change_address (destmem, DImode, destptr);
22272 emit_insn (gen_strset (destptr, dest, value));
22273 emit_insn (gen_strset (destptr, dest, value));
22277 dest = change_address (destmem, SImode, destptr);
22278 emit_insn (gen_strset (destptr, dest, value));
22279 emit_insn (gen_strset (destptr, dest, value));
22280 emit_insn (gen_strset (destptr, dest, value));
22281 emit_insn (gen_strset (destptr, dest, value));
22283 emit_label (label);
22284 LABEL_NUSES (label) = 1;
22288 rtx label = ix86_expand_aligntest (count, 8, true);
22291 dest = change_address (destmem, DImode, destptr);
22292 emit_insn (gen_strset (destptr, dest, value));
22296 dest = change_address (destmem, SImode, destptr);
22297 emit_insn (gen_strset (destptr, dest, value));
22298 emit_insn (gen_strset (destptr, dest, value));
22300 emit_label (label);
22301 LABEL_NUSES (label) = 1;
22305 rtx label = ix86_expand_aligntest (count, 4, true);
22306 dest = change_address (destmem, SImode, destptr);
22307 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
22308 emit_label (label);
22309 LABEL_NUSES (label) = 1;
22313 rtx label = ix86_expand_aligntest (count, 2, true);
22314 dest = change_address (destmem, HImode, destptr);
22315 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
22316 emit_label (label);
22317 LABEL_NUSES (label) = 1;
22321 rtx label = ix86_expand_aligntest (count, 1, true);
22322 dest = change_address (destmem, QImode, destptr);
22323 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
22324 emit_label (label);
22325 LABEL_NUSES (label) = 1;
22329 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22330 DESIRED_ALIGNMENT. */
22332 expand_movmem_prologue (rtx destmem, rtx srcmem,
22333 rtx destptr, rtx srcptr, rtx count,
22334 int align, int desired_alignment)
22336 if (align <= 1 && desired_alignment > 1)
22338 rtx label = ix86_expand_aligntest (destptr, 1, false);
22339 srcmem = change_address (srcmem, QImode, srcptr);
22340 destmem = change_address (destmem, QImode, destptr);
22341 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22342 ix86_adjust_counter (count, 1);
22343 emit_label (label);
22344 LABEL_NUSES (label) = 1;
22346 if (align <= 2 && desired_alignment > 2)
22348 rtx label = ix86_expand_aligntest (destptr, 2, false);
22349 srcmem = change_address (srcmem, HImode, srcptr);
22350 destmem = change_address (destmem, HImode, destptr);
22351 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22352 ix86_adjust_counter (count, 2);
22353 emit_label (label);
22354 LABEL_NUSES (label) = 1;
22356 if (align <= 4 && desired_alignment > 4)
22358 rtx label = ix86_expand_aligntest (destptr, 4, false);
22359 srcmem = change_address (srcmem, SImode, srcptr);
22360 destmem = change_address (destmem, SImode, destptr);
22361 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22362 ix86_adjust_counter (count, 4);
22363 emit_label (label);
22364 LABEL_NUSES (label) = 1;
22366 gcc_assert (desired_alignment <= 8);
22369 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22370 ALIGN_BYTES is how many bytes need to be copied. */
22372 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
22373 int desired_align, int align_bytes)
22376 rtx orig_dst = dst;
22377 rtx orig_src = src;
22379 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
22380 if (src_align_bytes >= 0)
22381 src_align_bytes = desired_align - src_align_bytes;
22382 if (align_bytes & 1)
22384 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22385 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
22387 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22389 if (align_bytes & 2)
22391 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22392 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
22393 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22394 set_mem_align (dst, 2 * BITS_PER_UNIT);
22395 if (src_align_bytes >= 0
22396 && (src_align_bytes & 1) == (align_bytes & 1)
22397 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
22398 set_mem_align (src, 2 * BITS_PER_UNIT);
22400 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22402 if (align_bytes & 4)
22404 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22405 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
22406 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22407 set_mem_align (dst, 4 * BITS_PER_UNIT);
22408 if (src_align_bytes >= 0)
22410 unsigned int src_align = 0;
22411 if ((src_align_bytes & 3) == (align_bytes & 3))
22413 else if ((src_align_bytes & 1) == (align_bytes & 1))
22415 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
22416 set_mem_align (src, src_align * BITS_PER_UNIT);
22419 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22421 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22422 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
22423 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22424 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22425 if (src_align_bytes >= 0)
22427 unsigned int src_align = 0;
22428 if ((src_align_bytes & 7) == (align_bytes & 7))
22430 else if ((src_align_bytes & 3) == (align_bytes & 3))
22432 else if ((src_align_bytes & 1) == (align_bytes & 1))
22434 if (src_align > (unsigned int) desired_align)
22435 src_align = desired_align;
22436 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
22437 set_mem_align (src, src_align * BITS_PER_UNIT);
22439 if (MEM_SIZE_KNOWN_P (orig_dst))
22440 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22441 if (MEM_SIZE_KNOWN_P (orig_src))
22442 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
22447 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22448 DESIRED_ALIGNMENT. */
22450 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
22451 int align, int desired_alignment)
22453 if (align <= 1 && desired_alignment > 1)
22455 rtx label = ix86_expand_aligntest (destptr, 1, false);
22456 destmem = change_address (destmem, QImode, destptr);
22457 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
22458 ix86_adjust_counter (count, 1);
22459 emit_label (label);
22460 LABEL_NUSES (label) = 1;
22462 if (align <= 2 && desired_alignment > 2)
22464 rtx label = ix86_expand_aligntest (destptr, 2, false);
22465 destmem = change_address (destmem, HImode, destptr);
22466 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
22467 ix86_adjust_counter (count, 2);
22468 emit_label (label);
22469 LABEL_NUSES (label) = 1;
22471 if (align <= 4 && desired_alignment > 4)
22473 rtx label = ix86_expand_aligntest (destptr, 4, false);
22474 destmem = change_address (destmem, SImode, destptr);
22475 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
22476 ix86_adjust_counter (count, 4);
22477 emit_label (label);
22478 LABEL_NUSES (label) = 1;
22480 gcc_assert (desired_alignment <= 8);
22483 /* Set enough from DST to align DST known to by aligned by ALIGN to
22484 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22486 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
22487 int desired_align, int align_bytes)
22490 rtx orig_dst = dst;
22491 if (align_bytes & 1)
22493 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22495 emit_insn (gen_strset (destreg, dst,
22496 gen_lowpart (QImode, value)));
22498 if (align_bytes & 2)
22500 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22501 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22502 set_mem_align (dst, 2 * BITS_PER_UNIT);
22504 emit_insn (gen_strset (destreg, dst,
22505 gen_lowpart (HImode, value)));
22507 if (align_bytes & 4)
22509 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22510 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22511 set_mem_align (dst, 4 * BITS_PER_UNIT);
22513 emit_insn (gen_strset (destreg, dst,
22514 gen_lowpart (SImode, value)));
22516 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22517 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22518 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22519 if (MEM_SIZE_KNOWN_P (orig_dst))
22520 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22524 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22525 static enum stringop_alg
22526 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
22527 int *dynamic_check, bool *noalign)
22529 const struct stringop_algs * algs;
22530 bool optimize_for_speed;
22531 /* Algorithms using the rep prefix want at least edi and ecx;
22532 additionally, memset wants eax and memcpy wants esi. Don't
22533 consider such algorithms if the user has appropriated those
22534 registers for their own purposes. */
22535 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
22537 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
22540 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22541 || (alg != rep_prefix_1_byte \
22542 && alg != rep_prefix_4_byte \
22543 && alg != rep_prefix_8_byte))
22544 const struct processor_costs *cost;
22546 /* Even if the string operation call is cold, we still might spend a lot
22547 of time processing large blocks. */
22548 if (optimize_function_for_size_p (cfun)
22549 || (optimize_insn_for_size_p ()
22550 && expected_size != -1 && expected_size < 256))
22551 optimize_for_speed = false;
22553 optimize_for_speed = true;
22555 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
22557 *dynamic_check = -1;
22559 algs = &cost->memset[TARGET_64BIT != 0];
22561 algs = &cost->memcpy[TARGET_64BIT != 0];
22562 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
22563 return ix86_stringop_alg;
22564 /* rep; movq or rep; movl is the smallest variant. */
22565 else if (!optimize_for_speed)
22567 if (!count || (count & 3))
22568 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
22570 return rep_prefix_usable ? rep_prefix_4_byte : loop;
22572 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22574 else if (expected_size != -1 && expected_size < 4)
22575 return loop_1_byte;
22576 else if (expected_size != -1)
22579 enum stringop_alg alg = libcall;
22580 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22582 /* We get here if the algorithms that were not libcall-based
22583 were rep-prefix based and we are unable to use rep prefixes
22584 based on global register usage. Break out of the loop and
22585 use the heuristic below. */
22586 if (algs->size[i].max == 0)
22588 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22590 enum stringop_alg candidate = algs->size[i].alg;
22592 if (candidate != libcall && ALG_USABLE_P (candidate))
22594 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22595 last non-libcall inline algorithm. */
22596 if (TARGET_INLINE_ALL_STRINGOPS)
22598 /* When the current size is best to be copied by a libcall,
22599 but we are still forced to inline, run the heuristic below
22600 that will pick code for medium sized blocks. */
22601 if (alg != libcall)
22605 else if (ALG_USABLE_P (candidate))
22607 *noalign = algs->size[i].noalign;
22612 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22614 /* When asked to inline the call anyway, try to pick meaningful choice.
22615 We look for maximal size of block that is faster to copy by hand and
22616 take blocks of at most of that size guessing that average size will
22617 be roughly half of the block.
22619 If this turns out to be bad, we might simply specify the preferred
22620 choice in ix86_costs. */
22621 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22622 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22625 enum stringop_alg alg;
22627 bool any_alg_usable_p = true;
22629 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22631 enum stringop_alg candidate = algs->size[i].alg;
22632 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22634 if (candidate != libcall && candidate
22635 && ALG_USABLE_P (candidate))
22636 max = algs->size[i].max;
22638 /* If there aren't any usable algorithms, then recursing on
22639 smaller sizes isn't going to find anything. Just return the
22640 simple byte-at-a-time copy loop. */
22641 if (!any_alg_usable_p)
22643 /* Pick something reasonable. */
22644 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22645 *dynamic_check = 128;
22646 return loop_1_byte;
22650 alg = decide_alg (count, max / 2, memset, dynamic_check, noalign);
22651 gcc_assert (*dynamic_check == -1);
22652 gcc_assert (alg != libcall);
22653 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22654 *dynamic_check = max;
22657 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22658 #undef ALG_USABLE_P
22661 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22662 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22664 decide_alignment (int align,
22665 enum stringop_alg alg,
22668 int desired_align = 0;
22672 gcc_unreachable ();
22674 case unrolled_loop:
22675 desired_align = GET_MODE_SIZE (Pmode);
22677 case rep_prefix_8_byte:
22680 case rep_prefix_4_byte:
22681 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22682 copying whole cacheline at once. */
22683 if (TARGET_PENTIUMPRO)
22688 case rep_prefix_1_byte:
22689 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22690 copying whole cacheline at once. */
22691 if (TARGET_PENTIUMPRO)
22705 if (desired_align < align)
22706 desired_align = align;
22707 if (expected_size != -1 && expected_size < 4)
22708 desired_align = align;
22709 return desired_align;
22712 /* Return the smallest power of 2 greater than VAL. */
22714 smallest_pow2_greater_than (int val)
22722 /* Expand string move (memcpy) operation. Use i386 string operations
22723 when profitable. expand_setmem contains similar code. The code
22724 depends upon architecture, block size and alignment, but always has
22725 the same overall structure:
22727 1) Prologue guard: Conditional that jumps up to epilogues for small
22728 blocks that can be handled by epilogue alone. This is faster
22729 but also needed for correctness, since prologue assume the block
22730 is larger than the desired alignment.
22732 Optional dynamic check for size and libcall for large
22733 blocks is emitted here too, with -minline-stringops-dynamically.
22735 2) Prologue: copy first few bytes in order to get destination
22736 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22737 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22738 copied. We emit either a jump tree on power of two sized
22739 blocks, or a byte loop.
22741 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22742 with specified algorithm.
22744 4) Epilogue: code copying tail of the block that is too small to be
22745 handled by main body (or up to size guarded by prologue guard). */
22748 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22749 rtx expected_align_exp, rtx expected_size_exp)
22755 rtx jump_around_label = NULL;
22756 HOST_WIDE_INT align = 1;
22757 unsigned HOST_WIDE_INT count = 0;
22758 HOST_WIDE_INT expected_size = -1;
22759 int size_needed = 0, epilogue_size_needed;
22760 int desired_align = 0, align_bytes = 0;
22761 enum stringop_alg alg;
22763 bool need_zero_guard = false;
22766 if (CONST_INT_P (align_exp))
22767 align = INTVAL (align_exp);
22768 /* i386 can do misaligned access on reasonably increased cost. */
22769 if (CONST_INT_P (expected_align_exp)
22770 && INTVAL (expected_align_exp) > align)
22771 align = INTVAL (expected_align_exp);
22772 /* ALIGN is the minimum of destination and source alignment, but we care here
22773 just about destination alignment. */
22774 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22775 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22777 if (CONST_INT_P (count_exp))
22778 count = expected_size = INTVAL (count_exp);
22779 if (CONST_INT_P (expected_size_exp) && count == 0)
22780 expected_size = INTVAL (expected_size_exp);
22782 /* Make sure we don't need to care about overflow later on. */
22783 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22786 /* Step 0: Decide on preferred algorithm, desired alignment and
22787 size of chunks to be copied by main loop. */
22789 alg = decide_alg (count, expected_size, false, &dynamic_check, &noalign);
22790 desired_align = decide_alignment (align, alg, expected_size);
22792 if (!TARGET_ALIGN_STRINGOPS || noalign)
22793 align = desired_align;
22795 if (alg == libcall)
22797 gcc_assert (alg != no_stringop);
22799 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22800 destreg = copy_addr_to_reg (XEXP (dst, 0));
22801 srcreg = copy_addr_to_reg (XEXP (src, 0));
22806 gcc_unreachable ();
22808 need_zero_guard = true;
22809 size_needed = GET_MODE_SIZE (word_mode);
22811 case unrolled_loop:
22812 need_zero_guard = true;
22813 size_needed = GET_MODE_SIZE (word_mode) * (TARGET_64BIT ? 4 : 2);
22815 case rep_prefix_8_byte:
22818 case rep_prefix_4_byte:
22821 case rep_prefix_1_byte:
22825 need_zero_guard = true;
22830 epilogue_size_needed = size_needed;
22832 /* Step 1: Prologue guard. */
22834 /* Alignment code needs count to be in register. */
22835 if (CONST_INT_P (count_exp) && desired_align > align)
22837 if (INTVAL (count_exp) > desired_align
22838 && INTVAL (count_exp) > size_needed)
22841 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22842 if (align_bytes <= 0)
22845 align_bytes = desired_align - align_bytes;
22847 if (align_bytes == 0)
22848 count_exp = force_reg (counter_mode (count_exp), count_exp);
22850 gcc_assert (desired_align >= 1 && align >= 1);
22852 /* Ensure that alignment prologue won't copy past end of block. */
22853 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22855 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22856 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22857 Make sure it is power of 2. */
22858 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22862 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22864 /* If main algorithm works on QImode, no epilogue is needed.
22865 For small sizes just don't align anything. */
22866 if (size_needed == 1)
22867 desired_align = align;
22874 label = gen_label_rtx ();
22875 emit_cmp_and_jump_insns (count_exp,
22876 GEN_INT (epilogue_size_needed),
22877 LTU, 0, counter_mode (count_exp), 1, label);
22878 if (expected_size == -1 || expected_size < epilogue_size_needed)
22879 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22881 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22885 /* Emit code to decide on runtime whether library call or inline should be
22887 if (dynamic_check != -1)
22889 if (CONST_INT_P (count_exp))
22891 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22893 emit_block_move_via_libcall (dst, src, count_exp, false);
22894 count_exp = const0_rtx;
22900 rtx hot_label = gen_label_rtx ();
22901 jump_around_label = gen_label_rtx ();
22902 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22903 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22904 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22905 emit_block_move_via_libcall (dst, src, count_exp, false);
22906 emit_jump (jump_around_label);
22907 emit_label (hot_label);
22911 /* Step 2: Alignment prologue. */
22913 if (desired_align > align)
22915 if (align_bytes == 0)
22917 /* Except for the first move in epilogue, we no longer know
22918 constant offset in aliasing info. It don't seems to worth
22919 the pain to maintain it for the first move, so throw away
22921 src = change_address (src, BLKmode, srcreg);
22922 dst = change_address (dst, BLKmode, destreg);
22923 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22928 /* If we know how many bytes need to be stored before dst is
22929 sufficiently aligned, maintain aliasing info accurately. */
22930 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22931 desired_align, align_bytes);
22932 count_exp = plus_constant (counter_mode (count_exp),
22933 count_exp, -align_bytes);
22934 count -= align_bytes;
22936 if (need_zero_guard
22937 && (count < (unsigned HOST_WIDE_INT) size_needed
22938 || (align_bytes == 0
22939 && count < ((unsigned HOST_WIDE_INT) size_needed
22940 + desired_align - align))))
22942 /* It is possible that we copied enough so the main loop will not
22944 gcc_assert (size_needed > 1);
22945 if (label == NULL_RTX)
22946 label = gen_label_rtx ();
22947 emit_cmp_and_jump_insns (count_exp,
22948 GEN_INT (size_needed),
22949 LTU, 0, counter_mode (count_exp), 1, label);
22950 if (expected_size == -1
22951 || expected_size < (desired_align - align) / 2 + size_needed)
22952 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22954 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22957 if (label && size_needed == 1)
22959 emit_label (label);
22960 LABEL_NUSES (label) = 1;
22962 epilogue_size_needed = 1;
22964 else if (label == NULL_RTX)
22965 epilogue_size_needed = size_needed;
22967 /* Step 3: Main loop. */
22973 gcc_unreachable ();
22975 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22976 count_exp, QImode, 1, expected_size);
22979 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22980 count_exp, word_mode, 1, expected_size);
22982 case unrolled_loop:
22983 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22984 registers for 4 temporaries anyway. */
22985 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22986 count_exp, word_mode, TARGET_64BIT ? 4 : 2,
22989 case rep_prefix_8_byte:
22990 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22993 case rep_prefix_4_byte:
22994 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22997 case rep_prefix_1_byte:
22998 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
23002 /* Adjust properly the offset of src and dest memory for aliasing. */
23003 if (CONST_INT_P (count_exp))
23005 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
23006 (count / size_needed) * size_needed);
23007 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
23008 (count / size_needed) * size_needed);
23012 src = change_address (src, BLKmode, srcreg);
23013 dst = change_address (dst, BLKmode, destreg);
23016 /* Step 4: Epilogue to copy the remaining bytes. */
23020 /* When the main loop is done, COUNT_EXP might hold original count,
23021 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23022 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23023 bytes. Compensate if needed. */
23025 if (size_needed < epilogue_size_needed)
23028 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
23029 GEN_INT (size_needed - 1), count_exp, 1,
23031 if (tmp != count_exp)
23032 emit_move_insn (count_exp, tmp);
23034 emit_label (label);
23035 LABEL_NUSES (label) = 1;
23038 if (count_exp != const0_rtx && epilogue_size_needed > 1)
23039 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
23040 epilogue_size_needed);
23041 if (jump_around_label)
23042 emit_label (jump_around_label);
23046 /* Helper function for memcpy. For QImode value 0xXY produce
23047 0xXYXYXYXY of wide specified by MODE. This is essentially
23048 a * 0x10101010, but we can do slightly better than
23049 synth_mult by unwinding the sequence by hand on CPUs with
23052 promote_duplicated_reg (enum machine_mode mode, rtx val)
23054 enum machine_mode valmode = GET_MODE (val);
23056 int nops = mode == DImode ? 3 : 2;
23058 gcc_assert (mode == SImode || mode == DImode);
23059 if (val == const0_rtx)
23060 return copy_to_mode_reg (mode, const0_rtx);
23061 if (CONST_INT_P (val))
23063 HOST_WIDE_INT v = INTVAL (val) & 255;
23067 if (mode == DImode)
23068 v |= (v << 16) << 16;
23069 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23072 if (valmode == VOIDmode)
23074 if (valmode != QImode)
23075 val = gen_lowpart (QImode, val);
23076 if (mode == QImode)
23078 if (!TARGET_PARTIAL_REG_STALL)
23080 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
23081 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
23082 <= (ix86_cost->shift_const + ix86_cost->add) * nops
23083 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
23085 rtx reg = convert_modes (mode, QImode, val, true);
23086 tmp = promote_duplicated_reg (mode, const1_rtx);
23087 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
23092 rtx reg = convert_modes (mode, QImode, val, true);
23094 if (!TARGET_PARTIAL_REG_STALL)
23095 if (mode == SImode)
23096 emit_insn (gen_movsi_insv_1 (reg, reg));
23098 emit_insn (gen_movdi_insv_1 (reg, reg));
23101 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
23102 NULL, 1, OPTAB_DIRECT);
23104 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23106 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
23107 NULL, 1, OPTAB_DIRECT);
23108 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23109 if (mode == SImode)
23111 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
23112 NULL, 1, OPTAB_DIRECT);
23113 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23118 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23119 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23120 alignment from ALIGN to DESIRED_ALIGN. */
23122 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
23127 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
23128 promoted_val = promote_duplicated_reg (DImode, val);
23129 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
23130 promoted_val = promote_duplicated_reg (SImode, val);
23131 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
23132 promoted_val = promote_duplicated_reg (HImode, val);
23134 promoted_val = val;
23136 return promoted_val;
23139 /* Expand string clear operation (bzero). Use i386 string operations when
23140 profitable. See expand_movmem comment for explanation of individual
23141 steps performed. */
23143 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
23144 rtx expected_align_exp, rtx expected_size_exp)
23149 rtx jump_around_label = NULL;
23150 HOST_WIDE_INT align = 1;
23151 unsigned HOST_WIDE_INT count = 0;
23152 HOST_WIDE_INT expected_size = -1;
23153 int size_needed = 0, epilogue_size_needed;
23154 int desired_align = 0, align_bytes = 0;
23155 enum stringop_alg alg;
23156 rtx promoted_val = NULL;
23157 bool force_loopy_epilogue = false;
23159 bool need_zero_guard = false;
23162 if (CONST_INT_P (align_exp))
23163 align = INTVAL (align_exp);
23164 /* i386 can do misaligned access on reasonably increased cost. */
23165 if (CONST_INT_P (expected_align_exp)
23166 && INTVAL (expected_align_exp) > align)
23167 align = INTVAL (expected_align_exp);
23168 if (CONST_INT_P (count_exp))
23169 count = expected_size = INTVAL (count_exp);
23170 if (CONST_INT_P (expected_size_exp) && count == 0)
23171 expected_size = INTVAL (expected_size_exp);
23173 /* Make sure we don't need to care about overflow later on. */
23174 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
23177 /* Step 0: Decide on preferred algorithm, desired alignment and
23178 size of chunks to be copied by main loop. */
23180 alg = decide_alg (count, expected_size, true, &dynamic_check, &noalign);
23181 desired_align = decide_alignment (align, alg, expected_size);
23183 if (!TARGET_ALIGN_STRINGOPS || noalign)
23184 align = desired_align;
23186 if (alg == libcall)
23188 gcc_assert (alg != no_stringop);
23190 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
23191 destreg = copy_addr_to_reg (XEXP (dst, 0));
23196 gcc_unreachable ();
23198 need_zero_guard = true;
23199 size_needed = GET_MODE_SIZE (word_mode);
23201 case unrolled_loop:
23202 need_zero_guard = true;
23203 size_needed = GET_MODE_SIZE (word_mode) * 4;
23205 case rep_prefix_8_byte:
23208 case rep_prefix_4_byte:
23211 case rep_prefix_1_byte:
23215 need_zero_guard = true;
23219 epilogue_size_needed = size_needed;
23221 /* Step 1: Prologue guard. */
23223 /* Alignment code needs count to be in register. */
23224 if (CONST_INT_P (count_exp) && desired_align > align)
23226 if (INTVAL (count_exp) > desired_align
23227 && INTVAL (count_exp) > size_needed)
23230 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
23231 if (align_bytes <= 0)
23234 align_bytes = desired_align - align_bytes;
23236 if (align_bytes == 0)
23238 enum machine_mode mode = SImode;
23239 if (TARGET_64BIT && (count & ~0xffffffff))
23241 count_exp = force_reg (mode, count_exp);
23244 /* Do the cheap promotion to allow better CSE across the
23245 main loop and epilogue (ie one load of the big constant in the
23246 front of all code. */
23247 if (CONST_INT_P (val_exp))
23248 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
23249 desired_align, align);
23250 /* Ensure that alignment prologue won't copy past end of block. */
23251 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
23253 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
23254 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23255 Make sure it is power of 2. */
23256 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
23258 /* To improve performance of small blocks, we jump around the VAL
23259 promoting mode. This mean that if the promoted VAL is not constant,
23260 we might not use it in the epilogue and have to use byte
23262 if (epilogue_size_needed > 2 && !promoted_val)
23263 force_loopy_epilogue = true;
23266 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
23268 /* If main algorithm works on QImode, no epilogue is needed.
23269 For small sizes just don't align anything. */
23270 if (size_needed == 1)
23271 desired_align = align;
23278 label = gen_label_rtx ();
23279 emit_cmp_and_jump_insns (count_exp,
23280 GEN_INT (epilogue_size_needed),
23281 LTU, 0, counter_mode (count_exp), 1, label);
23282 if (expected_size == -1 || expected_size <= epilogue_size_needed)
23283 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23285 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23288 if (dynamic_check != -1)
23290 rtx hot_label = gen_label_rtx ();
23291 jump_around_label = gen_label_rtx ();
23292 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
23293 LEU, 0, counter_mode (count_exp), 1, hot_label);
23294 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23295 set_storage_via_libcall (dst, count_exp, val_exp, false);
23296 emit_jump (jump_around_label);
23297 emit_label (hot_label);
23300 /* Step 2: Alignment prologue. */
23302 /* Do the expensive promotion once we branched off the small blocks. */
23304 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
23305 desired_align, align);
23306 gcc_assert (desired_align >= 1 && align >= 1);
23308 if (desired_align > align)
23310 if (align_bytes == 0)
23312 /* Except for the first move in epilogue, we no longer know
23313 constant offset in aliasing info. It don't seems to worth
23314 the pain to maintain it for the first move, so throw away
23316 dst = change_address (dst, BLKmode, destreg);
23317 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
23322 /* If we know how many bytes need to be stored before dst is
23323 sufficiently aligned, maintain aliasing info accurately. */
23324 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
23325 desired_align, align_bytes);
23326 count_exp = plus_constant (counter_mode (count_exp),
23327 count_exp, -align_bytes);
23328 count -= align_bytes;
23330 if (need_zero_guard
23331 && (count < (unsigned HOST_WIDE_INT) size_needed
23332 || (align_bytes == 0
23333 && count < ((unsigned HOST_WIDE_INT) size_needed
23334 + desired_align - align))))
23336 /* It is possible that we copied enough so the main loop will not
23338 gcc_assert (size_needed > 1);
23339 if (label == NULL_RTX)
23340 label = gen_label_rtx ();
23341 emit_cmp_and_jump_insns (count_exp,
23342 GEN_INT (size_needed),
23343 LTU, 0, counter_mode (count_exp), 1, label);
23344 if (expected_size == -1
23345 || expected_size < (desired_align - align) / 2 + size_needed)
23346 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23348 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23351 if (label && size_needed == 1)
23353 emit_label (label);
23354 LABEL_NUSES (label) = 1;
23356 promoted_val = val_exp;
23357 epilogue_size_needed = 1;
23359 else if (label == NULL_RTX)
23360 epilogue_size_needed = size_needed;
23362 /* Step 3: Main loop. */
23368 gcc_unreachable ();
23370 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23371 count_exp, QImode, 1, expected_size);
23374 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23375 count_exp, word_mode, 1, expected_size);
23377 case unrolled_loop:
23378 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23379 count_exp, word_mode, 4, expected_size);
23381 case rep_prefix_8_byte:
23382 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23385 case rep_prefix_4_byte:
23386 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23389 case rep_prefix_1_byte:
23390 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23394 /* Adjust properly the offset of src and dest memory for aliasing. */
23395 if (CONST_INT_P (count_exp))
23396 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
23397 (count / size_needed) * size_needed);
23399 dst = change_address (dst, BLKmode, destreg);
23401 /* Step 4: Epilogue to copy the remaining bytes. */
23405 /* When the main loop is done, COUNT_EXP might hold original count,
23406 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23407 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23408 bytes. Compensate if needed. */
23410 if (size_needed < epilogue_size_needed)
23413 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
23414 GEN_INT (size_needed - 1), count_exp, 1,
23416 if (tmp != count_exp)
23417 emit_move_insn (count_exp, tmp);
23419 emit_label (label);
23420 LABEL_NUSES (label) = 1;
23423 if (count_exp != const0_rtx && epilogue_size_needed > 1)
23425 if (force_loopy_epilogue)
23426 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
23427 epilogue_size_needed);
23429 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
23430 epilogue_size_needed);
23432 if (jump_around_label)
23433 emit_label (jump_around_label);
23437 /* Expand the appropriate insns for doing strlen if not just doing
23440 out = result, initialized with the start address
23441 align_rtx = alignment of the address.
23442 scratch = scratch register, initialized with the startaddress when
23443 not aligned, otherwise undefined
23445 This is just the body. It needs the initializations mentioned above and
23446 some address computing at the end. These things are done in i386.md. */
23449 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
23453 rtx align_2_label = NULL_RTX;
23454 rtx align_3_label = NULL_RTX;
23455 rtx align_4_label = gen_label_rtx ();
23456 rtx end_0_label = gen_label_rtx ();
23458 rtx tmpreg = gen_reg_rtx (SImode);
23459 rtx scratch = gen_reg_rtx (SImode);
23463 if (CONST_INT_P (align_rtx))
23464 align = INTVAL (align_rtx);
23466 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23468 /* Is there a known alignment and is it less than 4? */
23471 rtx scratch1 = gen_reg_rtx (Pmode);
23472 emit_move_insn (scratch1, out);
23473 /* Is there a known alignment and is it not 2? */
23476 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
23477 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
23479 /* Leave just the 3 lower bits. */
23480 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
23481 NULL_RTX, 0, OPTAB_WIDEN);
23483 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23484 Pmode, 1, align_4_label);
23485 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
23486 Pmode, 1, align_2_label);
23487 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
23488 Pmode, 1, align_3_label);
23492 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23493 check if is aligned to 4 - byte. */
23495 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
23496 NULL_RTX, 0, OPTAB_WIDEN);
23498 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23499 Pmode, 1, align_4_label);
23502 mem = change_address (src, QImode, out);
23504 /* Now compare the bytes. */
23506 /* Compare the first n unaligned byte on a byte per byte basis. */
23507 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
23508 QImode, 1, end_0_label);
23510 /* Increment the address. */
23511 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23513 /* Not needed with an alignment of 2 */
23516 emit_label (align_2_label);
23518 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23521 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23523 emit_label (align_3_label);
23526 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23529 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23532 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23533 align this loop. It gives only huge programs, but does not help to
23535 emit_label (align_4_label);
23537 mem = change_address (src, SImode, out);
23538 emit_move_insn (scratch, mem);
23539 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
23541 /* This formula yields a nonzero result iff one of the bytes is zero.
23542 This saves three branches inside loop and many cycles. */
23544 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
23545 emit_insn (gen_one_cmplsi2 (scratch, scratch));
23546 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
23547 emit_insn (gen_andsi3 (tmpreg, tmpreg,
23548 gen_int_mode (0x80808080, SImode)));
23549 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
23554 rtx reg = gen_reg_rtx (SImode);
23555 rtx reg2 = gen_reg_rtx (Pmode);
23556 emit_move_insn (reg, tmpreg);
23557 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
23559 /* If zero is not in the first two bytes, move two bytes forward. */
23560 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23561 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23562 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23563 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
23564 gen_rtx_IF_THEN_ELSE (SImode, tmp,
23567 /* Emit lea manually to avoid clobbering of flags. */
23568 emit_insn (gen_rtx_SET (SImode, reg2,
23569 gen_rtx_PLUS (Pmode, out, const2_rtx)));
23571 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23572 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23573 emit_insn (gen_rtx_SET (VOIDmode, out,
23574 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
23580 rtx end_2_label = gen_label_rtx ();
23581 /* Is zero in the first two bytes? */
23583 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23584 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23585 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
23586 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23587 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
23589 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23590 JUMP_LABEL (tmp) = end_2_label;
23592 /* Not in the first two. Move two bytes forward. */
23593 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23594 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23596 emit_label (end_2_label);
23600 /* Avoid branch in fixing the byte. */
23601 tmpreg = gen_lowpart (QImode, tmpreg);
23602 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23603 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23604 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23605 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23607 emit_label (end_0_label);
23610 /* Expand strlen. */
23613 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23615 rtx addr, scratch1, scratch2, scratch3, scratch4;
23617 /* The generic case of strlen expander is long. Avoid it's
23618 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23620 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23621 && !TARGET_INLINE_ALL_STRINGOPS
23622 && !optimize_insn_for_size_p ()
23623 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23626 addr = force_reg (Pmode, XEXP (src, 0));
23627 scratch1 = gen_reg_rtx (Pmode);
23629 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23630 && !optimize_insn_for_size_p ())
23632 /* Well it seems that some optimizer does not combine a call like
23633 foo(strlen(bar), strlen(bar));
23634 when the move and the subtraction is done here. It does calculate
23635 the length just once when these instructions are done inside of
23636 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23637 often used and I use one fewer register for the lifetime of
23638 output_strlen_unroll() this is better. */
23640 emit_move_insn (out, addr);
23642 ix86_expand_strlensi_unroll_1 (out, src, align);
23644 /* strlensi_unroll_1 returns the address of the zero at the end of
23645 the string, like memchr(), so compute the length by subtracting
23646 the start address. */
23647 emit_insn (ix86_gen_sub3 (out, out, addr));
23653 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23654 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23657 scratch2 = gen_reg_rtx (Pmode);
23658 scratch3 = gen_reg_rtx (Pmode);
23659 scratch4 = force_reg (Pmode, constm1_rtx);
23661 emit_move_insn (scratch3, addr);
23662 eoschar = force_reg (QImode, eoschar);
23664 src = replace_equiv_address_nv (src, scratch3);
23666 /* If .md starts supporting :P, this can be done in .md. */
23667 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23668 scratch4), UNSPEC_SCAS);
23669 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23670 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23671 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23676 /* For given symbol (function) construct code to compute address of it's PLT
23677 entry in large x86-64 PIC model. */
23679 construct_plt_address (rtx symbol)
23683 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23684 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23685 gcc_assert (Pmode == DImode);
23687 tmp = gen_reg_rtx (Pmode);
23688 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23690 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23691 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
23696 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23698 rtx pop, bool sibcall)
23700 /* We need to represent that SI and DI registers are clobbered
23702 static int clobbered_registers[] = {
23703 XMM6_REG, XMM7_REG, XMM8_REG,
23704 XMM9_REG, XMM10_REG, XMM11_REG,
23705 XMM12_REG, XMM13_REG, XMM14_REG,
23706 XMM15_REG, SI_REG, DI_REG
23708 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23709 rtx use = NULL, call;
23710 unsigned int vec_len;
23712 if (pop == const0_rtx)
23714 gcc_assert (!TARGET_64BIT || !pop);
23716 if (TARGET_MACHO && !TARGET_64BIT)
23719 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23720 fnaddr = machopic_indirect_call_target (fnaddr);
23725 /* Static functions and indirect calls don't need the pic register. */
23726 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23727 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23728 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23729 use_reg (&use, pic_offset_table_rtx);
23732 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23734 rtx al = gen_rtx_REG (QImode, AX_REG);
23735 emit_move_insn (al, callarg2);
23736 use_reg (&use, al);
23739 if (ix86_cmodel == CM_LARGE_PIC
23741 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23742 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23743 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23745 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
23746 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
23748 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
23749 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
23753 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23755 call = gen_rtx_SET (VOIDmode, retval, call);
23756 vec[vec_len++] = call;
23760 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23761 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23762 vec[vec_len++] = pop;
23765 if (TARGET_64BIT_MS_ABI
23766 && (!callarg2 || INTVAL (callarg2) != -2))
23770 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23771 UNSPEC_MS_TO_SYSV_CALL);
23773 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23775 = gen_rtx_CLOBBER (VOIDmode,
23776 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23778 clobbered_registers[i]));
23782 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23783 call = emit_call_insn (call);
23785 CALL_INSN_FUNCTION_USAGE (call) = use;
23790 /* Output the assembly for a call instruction. */
23793 ix86_output_call_insn (rtx insn, rtx call_op)
23795 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
23796 bool seh_nop_p = false;
23799 if (SIBLING_CALL_P (insn))
23803 /* SEH epilogue detection requires the indirect branch case
23804 to include REX.W. */
23805 else if (TARGET_SEH)
23806 xasm = "rex.W jmp %A0";
23810 output_asm_insn (xasm, &call_op);
23814 /* SEH unwinding can require an extra nop to be emitted in several
23815 circumstances. Determine if we have one of those. */
23820 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23822 /* If we get to another real insn, we don't need the nop. */
23826 /* If we get to the epilogue note, prevent a catch region from
23827 being adjacent to the standard epilogue sequence. If non-
23828 call-exceptions, we'll have done this during epilogue emission. */
23829 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23830 && !flag_non_call_exceptions
23831 && !can_throw_internal (insn))
23838 /* If we didn't find a real insn following the call, prevent the
23839 unwinder from looking into the next function. */
23845 xasm = "call\t%P0";
23847 xasm = "call\t%A0";
23849 output_asm_insn (xasm, &call_op);
23857 /* Clear stack slot assignments remembered from previous functions.
23858 This is called from INIT_EXPANDERS once before RTL is emitted for each
23861 static struct machine_function *
23862 ix86_init_machine_status (void)
23864 struct machine_function *f;
23866 f = ggc_alloc_cleared_machine_function ();
23867 f->use_fast_prologue_epilogue_nregs = -1;
23868 f->call_abi = ix86_abi;
23873 /* Return a MEM corresponding to a stack slot with mode MODE.
23874 Allocate a new slot if necessary.
23876 The RTL for a function can have several slots available: N is
23877 which slot to use. */
23880 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23882 struct stack_local_entry *s;
23884 gcc_assert (n < MAX_386_STACK_LOCALS);
23886 for (s = ix86_stack_locals; s; s = s->next)
23887 if (s->mode == mode && s->n == n)
23888 return validize_mem (copy_rtx (s->rtl));
23890 s = ggc_alloc_stack_local_entry ();
23893 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23895 s->next = ix86_stack_locals;
23896 ix86_stack_locals = s;
23897 return validize_mem (s->rtl);
23901 ix86_instantiate_decls (void)
23903 struct stack_local_entry *s;
23905 for (s = ix86_stack_locals; s; s = s->next)
23906 if (s->rtl != NULL_RTX)
23907 instantiate_decl_rtl (s->rtl);
23910 /* Calculate the length of the memory address in the instruction encoding.
23911 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23912 or other prefixes. We never generate addr32 prefix for LEA insn. */
23915 memory_address_length (rtx addr, bool lea)
23917 struct ix86_address parts;
23918 rtx base, index, disp;
23922 if (GET_CODE (addr) == PRE_DEC
23923 || GET_CODE (addr) == POST_INC
23924 || GET_CODE (addr) == PRE_MODIFY
23925 || GET_CODE (addr) == POST_MODIFY)
23928 ok = ix86_decompose_address (addr, &parts);
23931 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
23933 /* If this is not LEA instruction, add the length of addr32 prefix. */
23934 if (TARGET_64BIT && !lea
23935 && (SImode_address_operand (addr, VOIDmode)
23936 || (parts.base && GET_MODE (parts.base) == SImode)
23937 || (parts.index && GET_MODE (parts.index) == SImode)))
23941 index = parts.index;
23944 if (base && GET_CODE (base) == SUBREG)
23945 base = SUBREG_REG (base);
23946 if (index && GET_CODE (index) == SUBREG)
23947 index = SUBREG_REG (index);
23949 gcc_assert (base == NULL_RTX || REG_P (base));
23950 gcc_assert (index == NULL_RTX || REG_P (index));
23953 - esp as the base always wants an index,
23954 - ebp as the base always wants a displacement,
23955 - r12 as the base always wants an index,
23956 - r13 as the base always wants a displacement. */
23958 /* Register Indirect. */
23959 if (base && !index && !disp)
23961 /* esp (for its index) and ebp (for its displacement) need
23962 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23964 if (base == arg_pointer_rtx
23965 || base == frame_pointer_rtx
23966 || REGNO (base) == SP_REG
23967 || REGNO (base) == BP_REG
23968 || REGNO (base) == R12_REG
23969 || REGNO (base) == R13_REG)
23973 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23974 is not disp32, but disp32(%rip), so for disp32
23975 SIB byte is needed, unless print_operand_address
23976 optimizes it into disp32(%rip) or (%rip) is implied
23978 else if (disp && !base && !index)
23985 if (GET_CODE (disp) == CONST)
23986 symbol = XEXP (disp, 0);
23987 if (GET_CODE (symbol) == PLUS
23988 && CONST_INT_P (XEXP (symbol, 1)))
23989 symbol = XEXP (symbol, 0);
23991 if (GET_CODE (symbol) != LABEL_REF
23992 && (GET_CODE (symbol) != SYMBOL_REF
23993 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23994 && (GET_CODE (symbol) != UNSPEC
23995 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23996 && XINT (symbol, 1) != UNSPEC_PCREL
23997 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
24003 /* Find the length of the displacement constant. */
24006 if (base && satisfies_constraint_K (disp))
24011 /* ebp always wants a displacement. Similarly r13. */
24012 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
24015 /* An index requires the two-byte modrm form.... */
24017 /* ...like esp (or r12), which always wants an index. */
24018 || base == arg_pointer_rtx
24019 || base == frame_pointer_rtx
24020 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
24027 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24028 is set, expect that insn have 8bit immediate alternative. */
24030 ix86_attr_length_immediate_default (rtx insn, bool shortform)
24034 extract_insn_cached (insn);
24035 for (i = recog_data.n_operands - 1; i >= 0; --i)
24036 if (CONSTANT_P (recog_data.operand[i]))
24038 enum attr_mode mode = get_attr_mode (insn);
24041 if (shortform && CONST_INT_P (recog_data.operand[i]))
24043 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
24050 ival = trunc_int_for_mode (ival, HImode);
24053 ival = trunc_int_for_mode (ival, SImode);
24058 if (IN_RANGE (ival, -128, 127))
24075 /* Immediates for DImode instructions are encoded
24076 as 32bit sign extended values. */
24081 fatal_insn ("unknown insn mode", insn);
24087 /* Compute default value for "length_address" attribute. */
24089 ix86_attr_length_address_default (rtx insn)
24093 if (get_attr_type (insn) == TYPE_LEA)
24095 rtx set = PATTERN (insn), addr;
24097 if (GET_CODE (set) == PARALLEL)
24098 set = XVECEXP (set, 0, 0);
24100 gcc_assert (GET_CODE (set) == SET);
24102 addr = SET_SRC (set);
24104 return memory_address_length (addr, true);
24107 extract_insn_cached (insn);
24108 for (i = recog_data.n_operands - 1; i >= 0; --i)
24109 if (MEM_P (recog_data.operand[i]))
24111 constrain_operands_cached (reload_completed);
24112 if (which_alternative != -1)
24114 const char *constraints = recog_data.constraints[i];
24115 int alt = which_alternative;
24117 while (*constraints == '=' || *constraints == '+')
24120 while (*constraints++ != ',')
24122 /* Skip ignored operands. */
24123 if (*constraints == 'X')
24126 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
24131 /* Compute default value for "length_vex" attribute. It includes
24132 2 or 3 byte VEX prefix and 1 opcode byte. */
24135 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
24139 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24140 byte VEX prefix. */
24141 if (!has_0f_opcode || has_vex_w)
24144 /* We can always use 2 byte VEX prefix in 32bit. */
24148 extract_insn_cached (insn);
24150 for (i = recog_data.n_operands - 1; i >= 0; --i)
24151 if (REG_P (recog_data.operand[i]))
24153 /* REX.W bit uses 3 byte VEX prefix. */
24154 if (GET_MODE (recog_data.operand[i]) == DImode
24155 && GENERAL_REG_P (recog_data.operand[i]))
24160 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24161 if (MEM_P (recog_data.operand[i])
24162 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
24169 /* Return the maximum number of instructions a cpu can issue. */
24172 ix86_issue_rate (void)
24176 case PROCESSOR_PENTIUM:
24177 case PROCESSOR_ATOM:
24178 case PROCESSOR_SLM:
24180 case PROCESSOR_BTVER2:
24183 case PROCESSOR_PENTIUMPRO:
24184 case PROCESSOR_PENTIUM4:
24185 case PROCESSOR_CORE2:
24186 case PROCESSOR_COREI7:
24187 case PROCESSOR_HASWELL:
24188 case PROCESSOR_ATHLON:
24190 case PROCESSOR_AMDFAM10:
24191 case PROCESSOR_NOCONA:
24192 case PROCESSOR_GENERIC32:
24193 case PROCESSOR_GENERIC64:
24194 case PROCESSOR_BDVER1:
24195 case PROCESSOR_BDVER2:
24196 case PROCESSOR_BDVER3:
24197 case PROCESSOR_BTVER1:
24205 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24206 by DEP_INSN and nothing set by DEP_INSN. */
24209 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
24213 /* Simplify the test for uninteresting insns. */
24214 if (insn_type != TYPE_SETCC
24215 && insn_type != TYPE_ICMOV
24216 && insn_type != TYPE_FCMOV
24217 && insn_type != TYPE_IBR)
24220 if ((set = single_set (dep_insn)) != 0)
24222 set = SET_DEST (set);
24225 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
24226 && XVECLEN (PATTERN (dep_insn), 0) == 2
24227 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
24228 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
24230 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
24231 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
24236 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
24239 /* This test is true if the dependent insn reads the flags but
24240 not any other potentially set register. */
24241 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
24244 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
24250 /* Return true iff USE_INSN has a memory address with operands set by
24254 ix86_agi_dependent (rtx set_insn, rtx use_insn)
24257 extract_insn_cached (use_insn);
24258 for (i = recog_data.n_operands - 1; i >= 0; --i)
24259 if (MEM_P (recog_data.operand[i]))
24261 rtx addr = XEXP (recog_data.operand[i], 0);
24262 return modified_in_p (addr, set_insn) != 0;
24267 /* Helper function for exact_store_load_dependency.
24268 Return true if addr is found in insn. */
24270 exact_dependency_1 (rtx addr, rtx insn)
24272 enum rtx_code code;
24273 const char *format_ptr;
24276 code = GET_CODE (insn);
24280 if (rtx_equal_p (addr, insn))
24295 format_ptr = GET_RTX_FORMAT (code);
24296 for (i = 0; i < GET_RTX_LENGTH (code); i++)
24298 switch (*format_ptr++)
24301 if (exact_dependency_1 (addr, XEXP (insn, i)))
24305 for (j = 0; j < XVECLEN (insn, i); j++)
24306 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
24314 /* Return true if there exists exact dependency for store & load, i.e.
24315 the same memory address is used in them. */
24317 exact_store_load_dependency (rtx store, rtx load)
24321 set1 = single_set (store);
24324 if (!MEM_P (SET_DEST (set1)))
24326 set2 = single_set (load);
24329 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
24335 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
24337 enum attr_type insn_type, dep_insn_type;
24338 enum attr_memory memory;
24340 int dep_insn_code_number;
24342 /* Anti and output dependencies have zero cost on all CPUs. */
24343 if (REG_NOTE_KIND (link) != 0)
24346 dep_insn_code_number = recog_memoized (dep_insn);
24348 /* If we can't recognize the insns, we can't really do anything. */
24349 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
24352 insn_type = get_attr_type (insn);
24353 dep_insn_type = get_attr_type (dep_insn);
24357 case PROCESSOR_PENTIUM:
24358 /* Address Generation Interlock adds a cycle of latency. */
24359 if (insn_type == TYPE_LEA)
24361 rtx addr = PATTERN (insn);
24363 if (GET_CODE (addr) == PARALLEL)
24364 addr = XVECEXP (addr, 0, 0);
24366 gcc_assert (GET_CODE (addr) == SET);
24368 addr = SET_SRC (addr);
24369 if (modified_in_p (addr, dep_insn))
24372 else if (ix86_agi_dependent (dep_insn, insn))
24375 /* ??? Compares pair with jump/setcc. */
24376 if (ix86_flags_dependent (insn, dep_insn, insn_type))
24379 /* Floating point stores require value to be ready one cycle earlier. */
24380 if (insn_type == TYPE_FMOV
24381 && get_attr_memory (insn) == MEMORY_STORE
24382 && !ix86_agi_dependent (dep_insn, insn))
24386 case PROCESSOR_PENTIUMPRO:
24387 memory = get_attr_memory (insn);
24389 /* INT->FP conversion is expensive. */
24390 if (get_attr_fp_int_src (dep_insn))
24393 /* There is one cycle extra latency between an FP op and a store. */
24394 if (insn_type == TYPE_FMOV
24395 && (set = single_set (dep_insn)) != NULL_RTX
24396 && (set2 = single_set (insn)) != NULL_RTX
24397 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
24398 && MEM_P (SET_DEST (set2)))
24401 /* Show ability of reorder buffer to hide latency of load by executing
24402 in parallel with previous instruction in case
24403 previous instruction is not needed to compute the address. */
24404 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24405 && !ix86_agi_dependent (dep_insn, insn))
24407 /* Claim moves to take one cycle, as core can issue one load
24408 at time and the next load can start cycle later. */
24409 if (dep_insn_type == TYPE_IMOV
24410 || dep_insn_type == TYPE_FMOV)
24418 memory = get_attr_memory (insn);
24420 /* The esp dependency is resolved before the instruction is really
24422 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
24423 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
24426 /* INT->FP conversion is expensive. */
24427 if (get_attr_fp_int_src (dep_insn))
24430 /* Show ability of reorder buffer to hide latency of load by executing
24431 in parallel with previous instruction in case
24432 previous instruction is not needed to compute the address. */
24433 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24434 && !ix86_agi_dependent (dep_insn, insn))
24436 /* Claim moves to take one cycle, as core can issue one load
24437 at time and the next load can start cycle later. */
24438 if (dep_insn_type == TYPE_IMOV
24439 || dep_insn_type == TYPE_FMOV)
24448 case PROCESSOR_ATHLON:
24450 case PROCESSOR_AMDFAM10:
24451 case PROCESSOR_BDVER1:
24452 case PROCESSOR_BDVER2:
24453 case PROCESSOR_BDVER3:
24454 case PROCESSOR_BTVER1:
24455 case PROCESSOR_BTVER2:
24456 case PROCESSOR_ATOM:
24457 case PROCESSOR_GENERIC32:
24458 case PROCESSOR_GENERIC64:
24459 memory = get_attr_memory (insn);
24461 /* Show ability of reorder buffer to hide latency of load by executing
24462 in parallel with previous instruction in case
24463 previous instruction is not needed to compute the address. */
24464 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24465 && !ix86_agi_dependent (dep_insn, insn))
24467 enum attr_unit unit = get_attr_unit (insn);
24470 /* Because of the difference between the length of integer and
24471 floating unit pipeline preparation stages, the memory operands
24472 for floating point are cheaper.
24474 ??? For Athlon it the difference is most probably 2. */
24475 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
24478 loadcost = TARGET_ATHLON ? 2 : 0;
24480 if (cost >= loadcost)
24487 case PROCESSOR_SLM:
24488 if (!reload_completed)
24491 /* Increase cost of integer loads. */
24492 memory = get_attr_memory (dep_insn);
24493 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24495 enum attr_unit unit = get_attr_unit (dep_insn);
24496 if (unit == UNIT_INTEGER && cost == 1)
24498 if (memory == MEMORY_LOAD)
24502 /* Increase cost of ld/st for short int types only
24503 because of store forwarding issue. */
24504 rtx set = single_set (dep_insn);
24505 if (set && (GET_MODE (SET_DEST (set)) == QImode
24506 || GET_MODE (SET_DEST (set)) == HImode))
24508 /* Increase cost of store/load insn if exact
24509 dependence exists and it is load insn. */
24510 enum attr_memory insn_memory = get_attr_memory (insn);
24511 if (insn_memory == MEMORY_LOAD
24512 && exact_store_load_dependency (dep_insn, insn))
24526 /* How many alternative schedules to try. This should be as wide as the
24527 scheduling freedom in the DFA, but no wider. Making this value too
24528 large results extra work for the scheduler. */
24531 ia32_multipass_dfa_lookahead (void)
24535 case PROCESSOR_PENTIUM:
24538 case PROCESSOR_PENTIUMPRO:
24542 case PROCESSOR_CORE2:
24543 case PROCESSOR_COREI7:
24544 case PROCESSOR_HASWELL:
24545 case PROCESSOR_ATOM:
24546 case PROCESSOR_SLM:
24547 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24548 as many instructions can be executed on a cycle, i.e.,
24549 issue_rate. I wonder why tuning for many CPUs does not do this. */
24550 if (reload_completed)
24551 return ix86_issue_rate ();
24552 /* Don't use lookahead for pre-reload schedule to save compile time. */
24560 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24561 execution. It is applied if
24562 (1) IMUL instruction is on the top of list;
24563 (2) There exists the only producer of independent IMUL instruction in
24565 Return index of IMUL producer if it was found and -1 otherwise. */
24567 do_reorder_for_imul (rtx *ready, int n_ready)
24569 rtx insn, set, insn1, insn2;
24570 sd_iterator_def sd_it;
24575 if (ix86_tune != PROCESSOR_ATOM)
24578 /* Check that IMUL instruction is on the top of ready list. */
24579 insn = ready[n_ready - 1];
24580 set = single_set (insn);
24583 if (!(GET_CODE (SET_SRC (set)) == MULT
24584 && GET_MODE (SET_SRC (set)) == SImode))
24587 /* Search for producer of independent IMUL instruction. */
24588 for (i = n_ready - 2; i >= 0; i--)
24591 if (!NONDEBUG_INSN_P (insn))
24593 /* Skip IMUL instruction. */
24594 insn2 = PATTERN (insn);
24595 if (GET_CODE (insn2) == PARALLEL)
24596 insn2 = XVECEXP (insn2, 0, 0);
24597 if (GET_CODE (insn2) == SET
24598 && GET_CODE (SET_SRC (insn2)) == MULT
24599 && GET_MODE (SET_SRC (insn2)) == SImode)
24602 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
24605 con = DEP_CON (dep);
24606 if (!NONDEBUG_INSN_P (con))
24608 insn1 = PATTERN (con);
24609 if (GET_CODE (insn1) == PARALLEL)
24610 insn1 = XVECEXP (insn1, 0, 0);
24612 if (GET_CODE (insn1) == SET
24613 && GET_CODE (SET_SRC (insn1)) == MULT
24614 && GET_MODE (SET_SRC (insn1)) == SImode)
24616 sd_iterator_def sd_it1;
24618 /* Check if there is no other dependee for IMUL. */
24620 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
24623 pro = DEP_PRO (dep1);
24624 if (!NONDEBUG_INSN_P (pro))
24639 /* Try to find the best candidate on the top of ready list if two insns
24640 have the same priority - candidate is best if its dependees were
24641 scheduled earlier. Applied for Silvermont only.
24642 Return true if top 2 insns must be interchanged. */
24644 swap_top_of_ready_list (rtx *ready, int n_ready)
24646 rtx top = ready[n_ready - 1];
24647 rtx next = ready[n_ready - 2];
24649 sd_iterator_def sd_it;
24653 #define INSN_TICK(INSN) (HID (INSN)->tick)
24655 if (ix86_tune != PROCESSOR_SLM)
24658 if (!NONDEBUG_INSN_P (top))
24660 if (!NONJUMP_INSN_P (top))
24662 if (!NONDEBUG_INSN_P (next))
24664 if (!NONJUMP_INSN_P (next))
24666 set = single_set (top);
24669 set = single_set (next);
24673 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
24675 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
24677 /* Determine winner more precise. */
24678 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
24681 pro = DEP_PRO (dep);
24682 if (!NONDEBUG_INSN_P (pro))
24684 if (INSN_TICK (pro) > clock1)
24685 clock1 = INSN_TICK (pro);
24687 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
24690 pro = DEP_PRO (dep);
24691 if (!NONDEBUG_INSN_P (pro))
24693 if (INSN_TICK (pro) > clock2)
24694 clock2 = INSN_TICK (pro);
24697 if (clock1 == clock2)
24699 /* Determine winner - load must win. */
24700 enum attr_memory memory1, memory2;
24701 memory1 = get_attr_memory (top);
24702 memory2 = get_attr_memory (next);
24703 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
24706 return (bool) (clock2 < clock1);
24712 /* Perform possible reodering of ready list for Atom/Silvermont only.
24713 Return issue rate. */
24715 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
24718 int issue_rate = -1;
24719 int n_ready = *pn_ready;
24724 /* Set up issue rate. */
24725 issue_rate = ix86_issue_rate ();
24727 /* Do reodering for Atom/SLM only. */
24728 if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
24731 /* Nothing to do if ready list contains only 1 instruction. */
24735 /* Do reodering for post-reload scheduler only. */
24736 if (!reload_completed)
24739 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
24741 if (sched_verbose > 1)
24742 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
24743 INSN_UID (ready[index]));
24745 /* Put IMUL producer (ready[index]) at the top of ready list. */
24746 insn = ready[index];
24747 for (i = index; i < n_ready - 1; i++)
24748 ready[i] = ready[i + 1];
24749 ready[n_ready - 1] = insn;
24752 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
24754 if (sched_verbose > 1)
24755 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
24756 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
24757 /* Swap 2 top elements of ready list. */
24758 insn = ready[n_ready - 1];
24759 ready[n_ready - 1] = ready[n_ready - 2];
24760 ready[n_ready - 2] = insn;
24766 ix86_class_likely_spilled_p (reg_class_t);
24768 /* Returns true if lhs of insn is HW function argument register and set up
24769 is_spilled to true if it is likely spilled HW register. */
24771 insn_is_function_arg (rtx insn, bool* is_spilled)
24775 if (!NONDEBUG_INSN_P (insn))
24777 /* Call instructions are not movable, ignore it. */
24780 insn = PATTERN (insn);
24781 if (GET_CODE (insn) == PARALLEL)
24782 insn = XVECEXP (insn, 0, 0);
24783 if (GET_CODE (insn) != SET)
24785 dst = SET_DEST (insn);
24786 if (REG_P (dst) && HARD_REGISTER_P (dst)
24787 && ix86_function_arg_regno_p (REGNO (dst)))
24789 /* Is it likely spilled HW register? */
24790 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
24791 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
24792 *is_spilled = true;
24798 /* Add output dependencies for chain of function adjacent arguments if only
24799 there is a move to likely spilled HW register. Return first argument
24800 if at least one dependence was added or NULL otherwise. */
24802 add_parameter_dependencies (rtx call, rtx head)
24806 rtx first_arg = NULL;
24807 bool is_spilled = false;
24809 head = PREV_INSN (head);
24811 /* Find nearest to call argument passing instruction. */
24814 last = PREV_INSN (last);
24817 if (!NONDEBUG_INSN_P (last))
24819 if (insn_is_function_arg (last, &is_spilled))
24827 insn = PREV_INSN (last);
24828 if (!INSN_P (insn))
24832 if (!NONDEBUG_INSN_P (insn))
24837 if (insn_is_function_arg (insn, &is_spilled))
24839 /* Add output depdendence between two function arguments if chain
24840 of output arguments contains likely spilled HW registers. */
24842 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
24843 first_arg = last = insn;
24853 /* Add output or anti dependency from insn to first_arg to restrict its code
24856 avoid_func_arg_motion (rtx first_arg, rtx insn)
24861 set = single_set (insn);
24864 tmp = SET_DEST (set);
24867 /* Add output dependency to the first function argument. */
24868 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
24871 /* Add anti dependency. */
24872 add_dependence (first_arg, insn, REG_DEP_ANTI);
24875 /* Avoid cross block motion of function argument through adding dependency
24876 from the first non-jump instruction in bb. */
24878 add_dependee_for_func_arg (rtx arg, basic_block bb)
24880 rtx insn = BB_END (bb);
24884 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
24886 rtx set = single_set (insn);
24889 avoid_func_arg_motion (arg, insn);
24893 if (insn == BB_HEAD (bb))
24895 insn = PREV_INSN (insn);
24899 /* Hook for pre-reload schedule - avoid motion of function arguments
24900 passed in likely spilled HW registers. */
24902 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
24905 rtx first_arg = NULL;
24906 if (reload_completed)
24908 while (head != tail && DEBUG_INSN_P (head))
24909 head = NEXT_INSN (head);
24910 for (insn = tail; insn != head; insn = PREV_INSN (insn))
24911 if (INSN_P (insn) && CALL_P (insn))
24913 first_arg = add_parameter_dependencies (insn, head);
24916 /* Add dependee for first argument to predecessors if only
24917 region contains more than one block. */
24918 basic_block bb = BLOCK_FOR_INSN (insn);
24919 int rgn = CONTAINING_RGN (bb->index);
24920 int nr_blks = RGN_NR_BLOCKS (rgn);
24921 /* Skip trivial regions and region head blocks that can have
24922 predecessors outside of region. */
24923 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
24927 /* Assume that region is SCC, i.e. all immediate predecessors
24928 of non-head block are in the same region. */
24929 FOR_EACH_EDGE (e, ei, bb->preds)
24931 /* Avoid creating of loop-carried dependencies through
24932 using topological odering in region. */
24933 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
24934 add_dependee_for_func_arg (first_arg, e->src);
24942 else if (first_arg)
24943 avoid_func_arg_motion (first_arg, insn);
24946 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24947 HW registers to maximum, to schedule them at soon as possible. These are
24948 moves from function argument registers at the top of the function entry
24949 and moves from function return value registers after call. */
24951 ix86_adjust_priority (rtx insn, int priority)
24955 if (reload_completed)
24958 if (!NONDEBUG_INSN_P (insn))
24961 set = single_set (insn);
24964 rtx tmp = SET_SRC (set);
24966 && HARD_REGISTER_P (tmp)
24967 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
24968 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
24969 return current_sched_info->sched_max_insns_priority;
24975 /* Model decoder of Core 2/i7.
24976 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24977 track the instruction fetch block boundaries and make sure that long
24978 (9+ bytes) instructions are assigned to D0. */
24980 /* Maximum length of an insn that can be handled by
24981 a secondary decoder unit. '8' for Core 2/i7. */
24982 static int core2i7_secondary_decoder_max_insn_size;
24984 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24985 '16' for Core 2/i7. */
24986 static int core2i7_ifetch_block_size;
24988 /* Maximum number of instructions decoder can handle per cycle.
24989 '6' for Core 2/i7. */
24990 static int core2i7_ifetch_block_max_insns;
24992 typedef struct ix86_first_cycle_multipass_data_ *
24993 ix86_first_cycle_multipass_data_t;
24994 typedef const struct ix86_first_cycle_multipass_data_ *
24995 const_ix86_first_cycle_multipass_data_t;
24997 /* A variable to store target state across calls to max_issue within
24999 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
25000 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
25002 /* Initialize DATA. */
25004 core2i7_first_cycle_multipass_init (void *_data)
25006 ix86_first_cycle_multipass_data_t data
25007 = (ix86_first_cycle_multipass_data_t) _data;
25009 data->ifetch_block_len = 0;
25010 data->ifetch_block_n_insns = 0;
25011 data->ready_try_change = NULL;
25012 data->ready_try_change_size = 0;
25015 /* Advancing the cycle; reset ifetch block counts. */
25017 core2i7_dfa_post_advance_cycle (void)
25019 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
25021 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
25023 data->ifetch_block_len = 0;
25024 data->ifetch_block_n_insns = 0;
25027 static int min_insn_size (rtx);
25029 /* Filter out insns from ready_try that the core will not be able to issue
25030 on current cycle due to decoder. */
25032 core2i7_first_cycle_multipass_filter_ready_try
25033 (const_ix86_first_cycle_multipass_data_t data,
25034 char *ready_try, int n_ready, bool first_cycle_insn_p)
25041 if (ready_try[n_ready])
25044 insn = get_ready_element (n_ready);
25045 insn_size = min_insn_size (insn);
25047 if (/* If this is a too long an insn for a secondary decoder ... */
25048 (!first_cycle_insn_p
25049 && insn_size > core2i7_secondary_decoder_max_insn_size)
25050 /* ... or it would not fit into the ifetch block ... */
25051 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
25052 /* ... or the decoder is full already ... */
25053 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
25054 /* ... mask the insn out. */
25056 ready_try[n_ready] = 1;
25058 if (data->ready_try_change)
25059 bitmap_set_bit (data->ready_try_change, n_ready);
25064 /* Prepare for a new round of multipass lookahead scheduling. */
25066 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
25067 bool first_cycle_insn_p)
25069 ix86_first_cycle_multipass_data_t data
25070 = (ix86_first_cycle_multipass_data_t) _data;
25071 const_ix86_first_cycle_multipass_data_t prev_data
25072 = ix86_first_cycle_multipass_data;
25074 /* Restore the state from the end of the previous round. */
25075 data->ifetch_block_len = prev_data->ifetch_block_len;
25076 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
25078 /* Filter instructions that cannot be issued on current cycle due to
25079 decoder restrictions. */
25080 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
25081 first_cycle_insn_p);
25084 /* INSN is being issued in current solution. Account for its impact on
25085 the decoder model. */
25087 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
25088 rtx insn, const void *_prev_data)
25090 ix86_first_cycle_multipass_data_t data
25091 = (ix86_first_cycle_multipass_data_t) _data;
25092 const_ix86_first_cycle_multipass_data_t prev_data
25093 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
25095 int insn_size = min_insn_size (insn);
25097 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
25098 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
25099 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
25100 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
25102 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25103 if (!data->ready_try_change)
25105 data->ready_try_change = sbitmap_alloc (n_ready);
25106 data->ready_try_change_size = n_ready;
25108 else if (data->ready_try_change_size < n_ready)
25110 data->ready_try_change = sbitmap_resize (data->ready_try_change,
25112 data->ready_try_change_size = n_ready;
25114 bitmap_clear (data->ready_try_change);
25116 /* Filter out insns from ready_try that the core will not be able to issue
25117 on current cycle due to decoder. */
25118 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
25122 /* Revert the effect on ready_try. */
25124 core2i7_first_cycle_multipass_backtrack (const void *_data,
25126 int n_ready ATTRIBUTE_UNUSED)
25128 const_ix86_first_cycle_multipass_data_t data
25129 = (const_ix86_first_cycle_multipass_data_t) _data;
25130 unsigned int i = 0;
25131 sbitmap_iterator sbi;
25133 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
25134 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
25140 /* Save the result of multipass lookahead scheduling for the next round. */
25142 core2i7_first_cycle_multipass_end (const void *_data)
25144 const_ix86_first_cycle_multipass_data_t data
25145 = (const_ix86_first_cycle_multipass_data_t) _data;
25146 ix86_first_cycle_multipass_data_t next_data
25147 = ix86_first_cycle_multipass_data;
25151 next_data->ifetch_block_len = data->ifetch_block_len;
25152 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
25156 /* Deallocate target data. */
25158 core2i7_first_cycle_multipass_fini (void *_data)
25160 ix86_first_cycle_multipass_data_t data
25161 = (ix86_first_cycle_multipass_data_t) _data;
25163 if (data->ready_try_change)
25165 sbitmap_free (data->ready_try_change);
25166 data->ready_try_change = NULL;
25167 data->ready_try_change_size = 0;
25171 /* Prepare for scheduling pass. */
25173 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
25174 int verbose ATTRIBUTE_UNUSED,
25175 int max_uid ATTRIBUTE_UNUSED)
25177 /* Install scheduling hooks for current CPU. Some of these hooks are used
25178 in time-critical parts of the scheduler, so we only set them up when
25179 they are actually used. */
25182 case PROCESSOR_CORE2:
25183 case PROCESSOR_COREI7:
25184 case PROCESSOR_HASWELL:
25185 /* Do not perform multipass scheduling for pre-reload schedule
25186 to save compile time. */
25187 if (reload_completed)
25189 targetm.sched.dfa_post_advance_cycle
25190 = core2i7_dfa_post_advance_cycle;
25191 targetm.sched.first_cycle_multipass_init
25192 = core2i7_first_cycle_multipass_init;
25193 targetm.sched.first_cycle_multipass_begin
25194 = core2i7_first_cycle_multipass_begin;
25195 targetm.sched.first_cycle_multipass_issue
25196 = core2i7_first_cycle_multipass_issue;
25197 targetm.sched.first_cycle_multipass_backtrack
25198 = core2i7_first_cycle_multipass_backtrack;
25199 targetm.sched.first_cycle_multipass_end
25200 = core2i7_first_cycle_multipass_end;
25201 targetm.sched.first_cycle_multipass_fini
25202 = core2i7_first_cycle_multipass_fini;
25204 /* Set decoder parameters. */
25205 core2i7_secondary_decoder_max_insn_size = 8;
25206 core2i7_ifetch_block_size = 16;
25207 core2i7_ifetch_block_max_insns = 6;
25210 /* ... Fall through ... */
25212 targetm.sched.dfa_post_advance_cycle = NULL;
25213 targetm.sched.first_cycle_multipass_init = NULL;
25214 targetm.sched.first_cycle_multipass_begin = NULL;
25215 targetm.sched.first_cycle_multipass_issue = NULL;
25216 targetm.sched.first_cycle_multipass_backtrack = NULL;
25217 targetm.sched.first_cycle_multipass_end = NULL;
25218 targetm.sched.first_cycle_multipass_fini = NULL;
25224 /* Compute the alignment given to a constant that is being placed in memory.
25225 EXP is the constant and ALIGN is the alignment that the object would
25227 The value of this function is used instead of that alignment to align
25231 ix86_constant_alignment (tree exp, int align)
25233 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
25234 || TREE_CODE (exp) == INTEGER_CST)
25236 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
25238 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
25241 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
25242 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
25243 return BITS_PER_WORD;
25248 /* Compute the alignment for a static variable.
25249 TYPE is the data type, and ALIGN is the alignment that
25250 the object would ordinarily have. The value of this function is used
25251 instead of that alignment to align the object. */
25254 ix86_data_alignment (tree type, int align)
25256 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
25258 if (AGGREGATE_TYPE_P (type)
25259 && TYPE_SIZE (type)
25260 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25261 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
25262 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
25263 && align < max_align)
25266 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25267 to 16byte boundary. */
25270 if (AGGREGATE_TYPE_P (type)
25271 && TYPE_SIZE (type)
25272 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25273 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
25274 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
25278 if (TREE_CODE (type) == ARRAY_TYPE)
25280 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
25282 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
25285 else if (TREE_CODE (type) == COMPLEX_TYPE)
25288 if (TYPE_MODE (type) == DCmode && align < 64)
25290 if ((TYPE_MODE (type) == XCmode
25291 || TYPE_MODE (type) == TCmode) && align < 128)
25294 else if ((TREE_CODE (type) == RECORD_TYPE
25295 || TREE_CODE (type) == UNION_TYPE
25296 || TREE_CODE (type) == QUAL_UNION_TYPE)
25297 && TYPE_FIELDS (type))
25299 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
25301 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
25304 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
25305 || TREE_CODE (type) == INTEGER_TYPE)
25307 if (TYPE_MODE (type) == DFmode && align < 64)
25309 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
25316 /* Compute the alignment for a local variable or a stack slot. EXP is
25317 the data type or decl itself, MODE is the widest mode available and
25318 ALIGN is the alignment that the object would ordinarily have. The
25319 value of this macro is used instead of that alignment to align the
25323 ix86_local_alignment (tree exp, enum machine_mode mode,
25324 unsigned int align)
25328 if (exp && DECL_P (exp))
25330 type = TREE_TYPE (exp);
25339 /* Don't do dynamic stack realignment for long long objects with
25340 -mpreferred-stack-boundary=2. */
25343 && ix86_preferred_stack_boundary < 64
25344 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
25345 && (!type || !TYPE_USER_ALIGN (type))
25346 && (!decl || !DECL_USER_ALIGN (decl)))
25349 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25350 register in MODE. We will return the largest alignment of XF
25354 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
25355 align = GET_MODE_ALIGNMENT (DFmode);
25359 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25360 to 16byte boundary. Exact wording is:
25362 An array uses the same alignment as its elements, except that a local or
25363 global array variable of length at least 16 bytes or
25364 a C99 variable-length array variable always has alignment of at least 16 bytes.
25366 This was added to allow use of aligned SSE instructions at arrays. This
25367 rule is meant for static storage (where compiler can not do the analysis
25368 by itself). We follow it for automatic variables only when convenient.
25369 We fully control everything in the function compiled and functions from
25370 other unit can not rely on the alignment.
25372 Exclude va_list type. It is the common case of local array where
25373 we can not benefit from the alignment. */
25374 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
25377 if (AGGREGATE_TYPE_P (type)
25378 && (va_list_type_node == NULL_TREE
25379 || (TYPE_MAIN_VARIANT (type)
25380 != TYPE_MAIN_VARIANT (va_list_type_node)))
25381 && TYPE_SIZE (type)
25382 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25383 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
25384 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
25387 if (TREE_CODE (type) == ARRAY_TYPE)
25389 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
25391 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
25394 else if (TREE_CODE (type) == COMPLEX_TYPE)
25396 if (TYPE_MODE (type) == DCmode && align < 64)
25398 if ((TYPE_MODE (type) == XCmode
25399 || TYPE_MODE (type) == TCmode) && align < 128)
25402 else if ((TREE_CODE (type) == RECORD_TYPE
25403 || TREE_CODE (type) == UNION_TYPE
25404 || TREE_CODE (type) == QUAL_UNION_TYPE)
25405 && TYPE_FIELDS (type))
25407 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
25409 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
25412 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
25413 || TREE_CODE (type) == INTEGER_TYPE)
25416 if (TYPE_MODE (type) == DFmode && align < 64)
25418 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
25424 /* Compute the minimum required alignment for dynamic stack realignment
25425 purposes for a local variable, parameter or a stack slot. EXP is
25426 the data type or decl itself, MODE is its mode and ALIGN is the
25427 alignment that the object would ordinarily have. */
25430 ix86_minimum_alignment (tree exp, enum machine_mode mode,
25431 unsigned int align)
25435 if (exp && DECL_P (exp))
25437 type = TREE_TYPE (exp);
25446 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
25449 /* Don't do dynamic stack realignment for long long objects with
25450 -mpreferred-stack-boundary=2. */
25451 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
25452 && (!type || !TYPE_USER_ALIGN (type))
25453 && (!decl || !DECL_USER_ALIGN (decl)))
25459 /* Find a location for the static chain incoming to a nested function.
25460 This is a register, unless all free registers are used by arguments. */
25463 ix86_static_chain (const_tree fndecl, bool incoming_p)
25467 if (!DECL_STATIC_CHAIN (fndecl))
25472 /* We always use R10 in 64-bit mode. */
25480 /* By default in 32-bit mode we use ECX to pass the static chain. */
25483 fntype = TREE_TYPE (fndecl);
25484 ccvt = ix86_get_callcvt (fntype);
25485 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
25487 /* Fastcall functions use ecx/edx for arguments, which leaves
25488 us with EAX for the static chain.
25489 Thiscall functions use ecx for arguments, which also
25490 leaves us with EAX for the static chain. */
25493 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
25495 /* Thiscall functions use ecx for arguments, which leaves
25496 us with EAX and EDX for the static chain.
25497 We are using for abi-compatibility EAX. */
25500 else if (ix86_function_regparm (fntype, fndecl) == 3)
25502 /* For regparm 3, we have no free call-clobbered registers in
25503 which to store the static chain. In order to implement this,
25504 we have the trampoline push the static chain to the stack.
25505 However, we can't push a value below the return address when
25506 we call the nested function directly, so we have to use an
25507 alternate entry point. For this we use ESI, and have the
25508 alternate entry point push ESI, so that things appear the
25509 same once we're executing the nested function. */
25512 if (fndecl == current_function_decl)
25513 ix86_static_chain_on_stack = true;
25514 return gen_frame_mem (SImode,
25515 plus_constant (Pmode,
25516 arg_pointer_rtx, -8));
25522 return gen_rtx_REG (Pmode, regno);
25525 /* Emit RTL insns to initialize the variable parts of a trampoline.
25526 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25527 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25528 to be passed to the target function. */
25531 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
25537 fnaddr = XEXP (DECL_RTL (fndecl), 0);
25543 /* Load the function address to r11. Try to load address using
25544 the shorter movl instead of movabs. We may want to support
25545 movq for kernel mode, but kernel does not use trampolines at
25546 the moment. FNADDR is a 32bit address and may not be in
25547 DImode when ptr_mode == SImode. Always use movl in this
25549 if (ptr_mode == SImode
25550 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
25552 fnaddr = copy_addr_to_reg (fnaddr);
25554 mem = adjust_address (m_tramp, HImode, offset);
25555 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
25557 mem = adjust_address (m_tramp, SImode, offset + 2);
25558 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
25563 mem = adjust_address (m_tramp, HImode, offset);
25564 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
25566 mem = adjust_address (m_tramp, DImode, offset + 2);
25567 emit_move_insn (mem, fnaddr);
25571 /* Load static chain using movabs to r10. Use the shorter movl
25572 instead of movabs when ptr_mode == SImode. */
25573 if (ptr_mode == SImode)
25584 mem = adjust_address (m_tramp, HImode, offset);
25585 emit_move_insn (mem, gen_int_mode (opcode, HImode));
25587 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
25588 emit_move_insn (mem, chain_value);
25591 /* Jump to r11; the last (unused) byte is a nop, only there to
25592 pad the write out to a single 32-bit store. */
25593 mem = adjust_address (m_tramp, SImode, offset);
25594 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
25601 /* Depending on the static chain location, either load a register
25602 with a constant, or push the constant to the stack. All of the
25603 instructions are the same size. */
25604 chain = ix86_static_chain (fndecl, true);
25607 switch (REGNO (chain))
25610 opcode = 0xb8; break;
25612 opcode = 0xb9; break;
25614 gcc_unreachable ();
25620 mem = adjust_address (m_tramp, QImode, offset);
25621 emit_move_insn (mem, gen_int_mode (opcode, QImode));
25623 mem = adjust_address (m_tramp, SImode, offset + 1);
25624 emit_move_insn (mem, chain_value);
25627 mem = adjust_address (m_tramp, QImode, offset);
25628 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
25630 mem = adjust_address (m_tramp, SImode, offset + 1);
25632 /* Compute offset from the end of the jmp to the target function.
25633 In the case in which the trampoline stores the static chain on
25634 the stack, we need to skip the first insn which pushes the
25635 (call-saved) register static chain; this push is 1 byte. */
25637 disp = expand_binop (SImode, sub_optab, fnaddr,
25638 plus_constant (Pmode, XEXP (m_tramp, 0),
25639 offset - (MEM_P (chain) ? 1 : 0)),
25640 NULL_RTX, 1, OPTAB_DIRECT);
25641 emit_move_insn (mem, disp);
25644 gcc_assert (offset <= TRAMPOLINE_SIZE);
25646 #ifdef HAVE_ENABLE_EXECUTE_STACK
25647 #ifdef CHECK_EXECUTE_STACK_ENABLED
25648 if (CHECK_EXECUTE_STACK_ENABLED)
25650 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
25651 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
25655 /* The following file contains several enumerations and data structures
25656 built from the definitions in i386-builtin-types.def. */
25658 #include "i386-builtin-types.inc"
25660 /* Table for the ix86 builtin non-function types. */
25661 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
25663 /* Retrieve an element from the above table, building some of
25664 the types lazily. */
25667 ix86_get_builtin_type (enum ix86_builtin_type tcode)
25669 unsigned int index;
25672 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
25674 type = ix86_builtin_type_tab[(int) tcode];
25678 gcc_assert (tcode > IX86_BT_LAST_PRIM);
25679 if (tcode <= IX86_BT_LAST_VECT)
25681 enum machine_mode mode;
25683 index = tcode - IX86_BT_LAST_PRIM - 1;
25684 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
25685 mode = ix86_builtin_type_vect_mode[index];
25687 type = build_vector_type_for_mode (itype, mode);
25693 index = tcode - IX86_BT_LAST_VECT - 1;
25694 if (tcode <= IX86_BT_LAST_PTR)
25695 quals = TYPE_UNQUALIFIED;
25697 quals = TYPE_QUAL_CONST;
25699 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
25700 if (quals != TYPE_UNQUALIFIED)
25701 itype = build_qualified_type (itype, quals);
25703 type = build_pointer_type (itype);
25706 ix86_builtin_type_tab[(int) tcode] = type;
25710 /* Table for the ix86 builtin function types. */
25711 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
25713 /* Retrieve an element from the above table, building some of
25714 the types lazily. */
25717 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
25721 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
25723 type = ix86_builtin_func_type_tab[(int) tcode];
25727 if (tcode <= IX86_BT_LAST_FUNC)
25729 unsigned start = ix86_builtin_func_start[(int) tcode];
25730 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
25731 tree rtype, atype, args = void_list_node;
25734 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
25735 for (i = after - 1; i > start; --i)
25737 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
25738 args = tree_cons (NULL, atype, args);
25741 type = build_function_type (rtype, args);
25745 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
25746 enum ix86_builtin_func_type icode;
25748 icode = ix86_builtin_func_alias_base[index];
25749 type = ix86_get_builtin_func_type (icode);
25752 ix86_builtin_func_type_tab[(int) tcode] = type;
25757 /* Codes for all the SSE/MMX builtins. */
25760 IX86_BUILTIN_ADDPS,
25761 IX86_BUILTIN_ADDSS,
25762 IX86_BUILTIN_DIVPS,
25763 IX86_BUILTIN_DIVSS,
25764 IX86_BUILTIN_MULPS,
25765 IX86_BUILTIN_MULSS,
25766 IX86_BUILTIN_SUBPS,
25767 IX86_BUILTIN_SUBSS,
25769 IX86_BUILTIN_CMPEQPS,
25770 IX86_BUILTIN_CMPLTPS,
25771 IX86_BUILTIN_CMPLEPS,
25772 IX86_BUILTIN_CMPGTPS,
25773 IX86_BUILTIN_CMPGEPS,
25774 IX86_BUILTIN_CMPNEQPS,
25775 IX86_BUILTIN_CMPNLTPS,
25776 IX86_BUILTIN_CMPNLEPS,
25777 IX86_BUILTIN_CMPNGTPS,
25778 IX86_BUILTIN_CMPNGEPS,
25779 IX86_BUILTIN_CMPORDPS,
25780 IX86_BUILTIN_CMPUNORDPS,
25781 IX86_BUILTIN_CMPEQSS,
25782 IX86_BUILTIN_CMPLTSS,
25783 IX86_BUILTIN_CMPLESS,
25784 IX86_BUILTIN_CMPNEQSS,
25785 IX86_BUILTIN_CMPNLTSS,
25786 IX86_BUILTIN_CMPNLESS,
25787 IX86_BUILTIN_CMPNGTSS,
25788 IX86_BUILTIN_CMPNGESS,
25789 IX86_BUILTIN_CMPORDSS,
25790 IX86_BUILTIN_CMPUNORDSS,
25792 IX86_BUILTIN_COMIEQSS,
25793 IX86_BUILTIN_COMILTSS,
25794 IX86_BUILTIN_COMILESS,
25795 IX86_BUILTIN_COMIGTSS,
25796 IX86_BUILTIN_COMIGESS,
25797 IX86_BUILTIN_COMINEQSS,
25798 IX86_BUILTIN_UCOMIEQSS,
25799 IX86_BUILTIN_UCOMILTSS,
25800 IX86_BUILTIN_UCOMILESS,
25801 IX86_BUILTIN_UCOMIGTSS,
25802 IX86_BUILTIN_UCOMIGESS,
25803 IX86_BUILTIN_UCOMINEQSS,
25805 IX86_BUILTIN_CVTPI2PS,
25806 IX86_BUILTIN_CVTPS2PI,
25807 IX86_BUILTIN_CVTSI2SS,
25808 IX86_BUILTIN_CVTSI642SS,
25809 IX86_BUILTIN_CVTSS2SI,
25810 IX86_BUILTIN_CVTSS2SI64,
25811 IX86_BUILTIN_CVTTPS2PI,
25812 IX86_BUILTIN_CVTTSS2SI,
25813 IX86_BUILTIN_CVTTSS2SI64,
25815 IX86_BUILTIN_MAXPS,
25816 IX86_BUILTIN_MAXSS,
25817 IX86_BUILTIN_MINPS,
25818 IX86_BUILTIN_MINSS,
25820 IX86_BUILTIN_LOADUPS,
25821 IX86_BUILTIN_STOREUPS,
25822 IX86_BUILTIN_MOVSS,
25824 IX86_BUILTIN_MOVHLPS,
25825 IX86_BUILTIN_MOVLHPS,
25826 IX86_BUILTIN_LOADHPS,
25827 IX86_BUILTIN_LOADLPS,
25828 IX86_BUILTIN_STOREHPS,
25829 IX86_BUILTIN_STORELPS,
25831 IX86_BUILTIN_MASKMOVQ,
25832 IX86_BUILTIN_MOVMSKPS,
25833 IX86_BUILTIN_PMOVMSKB,
25835 IX86_BUILTIN_MOVNTPS,
25836 IX86_BUILTIN_MOVNTQ,
25838 IX86_BUILTIN_LOADDQU,
25839 IX86_BUILTIN_STOREDQU,
25841 IX86_BUILTIN_PACKSSWB,
25842 IX86_BUILTIN_PACKSSDW,
25843 IX86_BUILTIN_PACKUSWB,
25845 IX86_BUILTIN_PADDB,
25846 IX86_BUILTIN_PADDW,
25847 IX86_BUILTIN_PADDD,
25848 IX86_BUILTIN_PADDQ,
25849 IX86_BUILTIN_PADDSB,
25850 IX86_BUILTIN_PADDSW,
25851 IX86_BUILTIN_PADDUSB,
25852 IX86_BUILTIN_PADDUSW,
25853 IX86_BUILTIN_PSUBB,
25854 IX86_BUILTIN_PSUBW,
25855 IX86_BUILTIN_PSUBD,
25856 IX86_BUILTIN_PSUBQ,
25857 IX86_BUILTIN_PSUBSB,
25858 IX86_BUILTIN_PSUBSW,
25859 IX86_BUILTIN_PSUBUSB,
25860 IX86_BUILTIN_PSUBUSW,
25863 IX86_BUILTIN_PANDN,
25867 IX86_BUILTIN_PAVGB,
25868 IX86_BUILTIN_PAVGW,
25870 IX86_BUILTIN_PCMPEQB,
25871 IX86_BUILTIN_PCMPEQW,
25872 IX86_BUILTIN_PCMPEQD,
25873 IX86_BUILTIN_PCMPGTB,
25874 IX86_BUILTIN_PCMPGTW,
25875 IX86_BUILTIN_PCMPGTD,
25877 IX86_BUILTIN_PMADDWD,
25879 IX86_BUILTIN_PMAXSW,
25880 IX86_BUILTIN_PMAXUB,
25881 IX86_BUILTIN_PMINSW,
25882 IX86_BUILTIN_PMINUB,
25884 IX86_BUILTIN_PMULHUW,
25885 IX86_BUILTIN_PMULHW,
25886 IX86_BUILTIN_PMULLW,
25888 IX86_BUILTIN_PSADBW,
25889 IX86_BUILTIN_PSHUFW,
25891 IX86_BUILTIN_PSLLW,
25892 IX86_BUILTIN_PSLLD,
25893 IX86_BUILTIN_PSLLQ,
25894 IX86_BUILTIN_PSRAW,
25895 IX86_BUILTIN_PSRAD,
25896 IX86_BUILTIN_PSRLW,
25897 IX86_BUILTIN_PSRLD,
25898 IX86_BUILTIN_PSRLQ,
25899 IX86_BUILTIN_PSLLWI,
25900 IX86_BUILTIN_PSLLDI,
25901 IX86_BUILTIN_PSLLQI,
25902 IX86_BUILTIN_PSRAWI,
25903 IX86_BUILTIN_PSRADI,
25904 IX86_BUILTIN_PSRLWI,
25905 IX86_BUILTIN_PSRLDI,
25906 IX86_BUILTIN_PSRLQI,
25908 IX86_BUILTIN_PUNPCKHBW,
25909 IX86_BUILTIN_PUNPCKHWD,
25910 IX86_BUILTIN_PUNPCKHDQ,
25911 IX86_BUILTIN_PUNPCKLBW,
25912 IX86_BUILTIN_PUNPCKLWD,
25913 IX86_BUILTIN_PUNPCKLDQ,
25915 IX86_BUILTIN_SHUFPS,
25917 IX86_BUILTIN_RCPPS,
25918 IX86_BUILTIN_RCPSS,
25919 IX86_BUILTIN_RSQRTPS,
25920 IX86_BUILTIN_RSQRTPS_NR,
25921 IX86_BUILTIN_RSQRTSS,
25922 IX86_BUILTIN_RSQRTF,
25923 IX86_BUILTIN_SQRTPS,
25924 IX86_BUILTIN_SQRTPS_NR,
25925 IX86_BUILTIN_SQRTSS,
25927 IX86_BUILTIN_UNPCKHPS,
25928 IX86_BUILTIN_UNPCKLPS,
25930 IX86_BUILTIN_ANDPS,
25931 IX86_BUILTIN_ANDNPS,
25933 IX86_BUILTIN_XORPS,
25936 IX86_BUILTIN_LDMXCSR,
25937 IX86_BUILTIN_STMXCSR,
25938 IX86_BUILTIN_SFENCE,
25940 IX86_BUILTIN_FXSAVE,
25941 IX86_BUILTIN_FXRSTOR,
25942 IX86_BUILTIN_FXSAVE64,
25943 IX86_BUILTIN_FXRSTOR64,
25945 IX86_BUILTIN_XSAVE,
25946 IX86_BUILTIN_XRSTOR,
25947 IX86_BUILTIN_XSAVE64,
25948 IX86_BUILTIN_XRSTOR64,
25950 IX86_BUILTIN_XSAVEOPT,
25951 IX86_BUILTIN_XSAVEOPT64,
25953 /* 3DNow! Original */
25954 IX86_BUILTIN_FEMMS,
25955 IX86_BUILTIN_PAVGUSB,
25956 IX86_BUILTIN_PF2ID,
25957 IX86_BUILTIN_PFACC,
25958 IX86_BUILTIN_PFADD,
25959 IX86_BUILTIN_PFCMPEQ,
25960 IX86_BUILTIN_PFCMPGE,
25961 IX86_BUILTIN_PFCMPGT,
25962 IX86_BUILTIN_PFMAX,
25963 IX86_BUILTIN_PFMIN,
25964 IX86_BUILTIN_PFMUL,
25965 IX86_BUILTIN_PFRCP,
25966 IX86_BUILTIN_PFRCPIT1,
25967 IX86_BUILTIN_PFRCPIT2,
25968 IX86_BUILTIN_PFRSQIT1,
25969 IX86_BUILTIN_PFRSQRT,
25970 IX86_BUILTIN_PFSUB,
25971 IX86_BUILTIN_PFSUBR,
25972 IX86_BUILTIN_PI2FD,
25973 IX86_BUILTIN_PMULHRW,
25975 /* 3DNow! Athlon Extensions */
25976 IX86_BUILTIN_PF2IW,
25977 IX86_BUILTIN_PFNACC,
25978 IX86_BUILTIN_PFPNACC,
25979 IX86_BUILTIN_PI2FW,
25980 IX86_BUILTIN_PSWAPDSI,
25981 IX86_BUILTIN_PSWAPDSF,
25984 IX86_BUILTIN_ADDPD,
25985 IX86_BUILTIN_ADDSD,
25986 IX86_BUILTIN_DIVPD,
25987 IX86_BUILTIN_DIVSD,
25988 IX86_BUILTIN_MULPD,
25989 IX86_BUILTIN_MULSD,
25990 IX86_BUILTIN_SUBPD,
25991 IX86_BUILTIN_SUBSD,
25993 IX86_BUILTIN_CMPEQPD,
25994 IX86_BUILTIN_CMPLTPD,
25995 IX86_BUILTIN_CMPLEPD,
25996 IX86_BUILTIN_CMPGTPD,
25997 IX86_BUILTIN_CMPGEPD,
25998 IX86_BUILTIN_CMPNEQPD,
25999 IX86_BUILTIN_CMPNLTPD,
26000 IX86_BUILTIN_CMPNLEPD,
26001 IX86_BUILTIN_CMPNGTPD,
26002 IX86_BUILTIN_CMPNGEPD,
26003 IX86_BUILTIN_CMPORDPD,
26004 IX86_BUILTIN_CMPUNORDPD,
26005 IX86_BUILTIN_CMPEQSD,
26006 IX86_BUILTIN_CMPLTSD,
26007 IX86_BUILTIN_CMPLESD,
26008 IX86_BUILTIN_CMPNEQSD,
26009 IX86_BUILTIN_CMPNLTSD,
26010 IX86_BUILTIN_CMPNLESD,
26011 IX86_BUILTIN_CMPORDSD,
26012 IX86_BUILTIN_CMPUNORDSD,
26014 IX86_BUILTIN_COMIEQSD,
26015 IX86_BUILTIN_COMILTSD,
26016 IX86_BUILTIN_COMILESD,
26017 IX86_BUILTIN_COMIGTSD,
26018 IX86_BUILTIN_COMIGESD,
26019 IX86_BUILTIN_COMINEQSD,
26020 IX86_BUILTIN_UCOMIEQSD,
26021 IX86_BUILTIN_UCOMILTSD,
26022 IX86_BUILTIN_UCOMILESD,
26023 IX86_BUILTIN_UCOMIGTSD,
26024 IX86_BUILTIN_UCOMIGESD,
26025 IX86_BUILTIN_UCOMINEQSD,
26027 IX86_BUILTIN_MAXPD,
26028 IX86_BUILTIN_MAXSD,
26029 IX86_BUILTIN_MINPD,
26030 IX86_BUILTIN_MINSD,
26032 IX86_BUILTIN_ANDPD,
26033 IX86_BUILTIN_ANDNPD,
26035 IX86_BUILTIN_XORPD,
26037 IX86_BUILTIN_SQRTPD,
26038 IX86_BUILTIN_SQRTSD,
26040 IX86_BUILTIN_UNPCKHPD,
26041 IX86_BUILTIN_UNPCKLPD,
26043 IX86_BUILTIN_SHUFPD,
26045 IX86_BUILTIN_LOADUPD,
26046 IX86_BUILTIN_STOREUPD,
26047 IX86_BUILTIN_MOVSD,
26049 IX86_BUILTIN_LOADHPD,
26050 IX86_BUILTIN_LOADLPD,
26052 IX86_BUILTIN_CVTDQ2PD,
26053 IX86_BUILTIN_CVTDQ2PS,
26055 IX86_BUILTIN_CVTPD2DQ,
26056 IX86_BUILTIN_CVTPD2PI,
26057 IX86_BUILTIN_CVTPD2PS,
26058 IX86_BUILTIN_CVTTPD2DQ,
26059 IX86_BUILTIN_CVTTPD2PI,
26061 IX86_BUILTIN_CVTPI2PD,
26062 IX86_BUILTIN_CVTSI2SD,
26063 IX86_BUILTIN_CVTSI642SD,
26065 IX86_BUILTIN_CVTSD2SI,
26066 IX86_BUILTIN_CVTSD2SI64,
26067 IX86_BUILTIN_CVTSD2SS,
26068 IX86_BUILTIN_CVTSS2SD,
26069 IX86_BUILTIN_CVTTSD2SI,
26070 IX86_BUILTIN_CVTTSD2SI64,
26072 IX86_BUILTIN_CVTPS2DQ,
26073 IX86_BUILTIN_CVTPS2PD,
26074 IX86_BUILTIN_CVTTPS2DQ,
26076 IX86_BUILTIN_MOVNTI,
26077 IX86_BUILTIN_MOVNTI64,
26078 IX86_BUILTIN_MOVNTPD,
26079 IX86_BUILTIN_MOVNTDQ,
26081 IX86_BUILTIN_MOVQ128,
26084 IX86_BUILTIN_MASKMOVDQU,
26085 IX86_BUILTIN_MOVMSKPD,
26086 IX86_BUILTIN_PMOVMSKB128,
26088 IX86_BUILTIN_PACKSSWB128,
26089 IX86_BUILTIN_PACKSSDW128,
26090 IX86_BUILTIN_PACKUSWB128,
26092 IX86_BUILTIN_PADDB128,
26093 IX86_BUILTIN_PADDW128,
26094 IX86_BUILTIN_PADDD128,
26095 IX86_BUILTIN_PADDQ128,
26096 IX86_BUILTIN_PADDSB128,
26097 IX86_BUILTIN_PADDSW128,
26098 IX86_BUILTIN_PADDUSB128,
26099 IX86_BUILTIN_PADDUSW128,
26100 IX86_BUILTIN_PSUBB128,
26101 IX86_BUILTIN_PSUBW128,
26102 IX86_BUILTIN_PSUBD128,
26103 IX86_BUILTIN_PSUBQ128,
26104 IX86_BUILTIN_PSUBSB128,
26105 IX86_BUILTIN_PSUBSW128,
26106 IX86_BUILTIN_PSUBUSB128,
26107 IX86_BUILTIN_PSUBUSW128,
26109 IX86_BUILTIN_PAND128,
26110 IX86_BUILTIN_PANDN128,
26111 IX86_BUILTIN_POR128,
26112 IX86_BUILTIN_PXOR128,
26114 IX86_BUILTIN_PAVGB128,
26115 IX86_BUILTIN_PAVGW128,
26117 IX86_BUILTIN_PCMPEQB128,
26118 IX86_BUILTIN_PCMPEQW128,
26119 IX86_BUILTIN_PCMPEQD128,
26120 IX86_BUILTIN_PCMPGTB128,
26121 IX86_BUILTIN_PCMPGTW128,
26122 IX86_BUILTIN_PCMPGTD128,
26124 IX86_BUILTIN_PMADDWD128,
26126 IX86_BUILTIN_PMAXSW128,
26127 IX86_BUILTIN_PMAXUB128,
26128 IX86_BUILTIN_PMINSW128,
26129 IX86_BUILTIN_PMINUB128,
26131 IX86_BUILTIN_PMULUDQ,
26132 IX86_BUILTIN_PMULUDQ128,
26133 IX86_BUILTIN_PMULHUW128,
26134 IX86_BUILTIN_PMULHW128,
26135 IX86_BUILTIN_PMULLW128,
26137 IX86_BUILTIN_PSADBW128,
26138 IX86_BUILTIN_PSHUFHW,
26139 IX86_BUILTIN_PSHUFLW,
26140 IX86_BUILTIN_PSHUFD,
26142 IX86_BUILTIN_PSLLDQI128,
26143 IX86_BUILTIN_PSLLWI128,
26144 IX86_BUILTIN_PSLLDI128,
26145 IX86_BUILTIN_PSLLQI128,
26146 IX86_BUILTIN_PSRAWI128,
26147 IX86_BUILTIN_PSRADI128,
26148 IX86_BUILTIN_PSRLDQI128,
26149 IX86_BUILTIN_PSRLWI128,
26150 IX86_BUILTIN_PSRLDI128,
26151 IX86_BUILTIN_PSRLQI128,
26153 IX86_BUILTIN_PSLLDQ128,
26154 IX86_BUILTIN_PSLLW128,
26155 IX86_BUILTIN_PSLLD128,
26156 IX86_BUILTIN_PSLLQ128,
26157 IX86_BUILTIN_PSRAW128,
26158 IX86_BUILTIN_PSRAD128,
26159 IX86_BUILTIN_PSRLW128,
26160 IX86_BUILTIN_PSRLD128,
26161 IX86_BUILTIN_PSRLQ128,
26163 IX86_BUILTIN_PUNPCKHBW128,
26164 IX86_BUILTIN_PUNPCKHWD128,
26165 IX86_BUILTIN_PUNPCKHDQ128,
26166 IX86_BUILTIN_PUNPCKHQDQ128,
26167 IX86_BUILTIN_PUNPCKLBW128,
26168 IX86_BUILTIN_PUNPCKLWD128,
26169 IX86_BUILTIN_PUNPCKLDQ128,
26170 IX86_BUILTIN_PUNPCKLQDQ128,
26172 IX86_BUILTIN_CLFLUSH,
26173 IX86_BUILTIN_MFENCE,
26174 IX86_BUILTIN_LFENCE,
26175 IX86_BUILTIN_PAUSE,
26177 IX86_BUILTIN_BSRSI,
26178 IX86_BUILTIN_BSRDI,
26179 IX86_BUILTIN_RDPMC,
26180 IX86_BUILTIN_RDTSC,
26181 IX86_BUILTIN_RDTSCP,
26182 IX86_BUILTIN_ROLQI,
26183 IX86_BUILTIN_ROLHI,
26184 IX86_BUILTIN_RORQI,
26185 IX86_BUILTIN_RORHI,
26188 IX86_BUILTIN_ADDSUBPS,
26189 IX86_BUILTIN_HADDPS,
26190 IX86_BUILTIN_HSUBPS,
26191 IX86_BUILTIN_MOVSHDUP,
26192 IX86_BUILTIN_MOVSLDUP,
26193 IX86_BUILTIN_ADDSUBPD,
26194 IX86_BUILTIN_HADDPD,
26195 IX86_BUILTIN_HSUBPD,
26196 IX86_BUILTIN_LDDQU,
26198 IX86_BUILTIN_MONITOR,
26199 IX86_BUILTIN_MWAIT,
26202 IX86_BUILTIN_PHADDW,
26203 IX86_BUILTIN_PHADDD,
26204 IX86_BUILTIN_PHADDSW,
26205 IX86_BUILTIN_PHSUBW,
26206 IX86_BUILTIN_PHSUBD,
26207 IX86_BUILTIN_PHSUBSW,
26208 IX86_BUILTIN_PMADDUBSW,
26209 IX86_BUILTIN_PMULHRSW,
26210 IX86_BUILTIN_PSHUFB,
26211 IX86_BUILTIN_PSIGNB,
26212 IX86_BUILTIN_PSIGNW,
26213 IX86_BUILTIN_PSIGND,
26214 IX86_BUILTIN_PALIGNR,
26215 IX86_BUILTIN_PABSB,
26216 IX86_BUILTIN_PABSW,
26217 IX86_BUILTIN_PABSD,
26219 IX86_BUILTIN_PHADDW128,
26220 IX86_BUILTIN_PHADDD128,
26221 IX86_BUILTIN_PHADDSW128,
26222 IX86_BUILTIN_PHSUBW128,
26223 IX86_BUILTIN_PHSUBD128,
26224 IX86_BUILTIN_PHSUBSW128,
26225 IX86_BUILTIN_PMADDUBSW128,
26226 IX86_BUILTIN_PMULHRSW128,
26227 IX86_BUILTIN_PSHUFB128,
26228 IX86_BUILTIN_PSIGNB128,
26229 IX86_BUILTIN_PSIGNW128,
26230 IX86_BUILTIN_PSIGND128,
26231 IX86_BUILTIN_PALIGNR128,
26232 IX86_BUILTIN_PABSB128,
26233 IX86_BUILTIN_PABSW128,
26234 IX86_BUILTIN_PABSD128,
26236 /* AMDFAM10 - SSE4A New Instructions. */
26237 IX86_BUILTIN_MOVNTSD,
26238 IX86_BUILTIN_MOVNTSS,
26239 IX86_BUILTIN_EXTRQI,
26240 IX86_BUILTIN_EXTRQ,
26241 IX86_BUILTIN_INSERTQI,
26242 IX86_BUILTIN_INSERTQ,
26245 IX86_BUILTIN_BLENDPD,
26246 IX86_BUILTIN_BLENDPS,
26247 IX86_BUILTIN_BLENDVPD,
26248 IX86_BUILTIN_BLENDVPS,
26249 IX86_BUILTIN_PBLENDVB128,
26250 IX86_BUILTIN_PBLENDW128,
26255 IX86_BUILTIN_INSERTPS128,
26257 IX86_BUILTIN_MOVNTDQA,
26258 IX86_BUILTIN_MPSADBW128,
26259 IX86_BUILTIN_PACKUSDW128,
26260 IX86_BUILTIN_PCMPEQQ,
26261 IX86_BUILTIN_PHMINPOSUW128,
26263 IX86_BUILTIN_PMAXSB128,
26264 IX86_BUILTIN_PMAXSD128,
26265 IX86_BUILTIN_PMAXUD128,
26266 IX86_BUILTIN_PMAXUW128,
26268 IX86_BUILTIN_PMINSB128,
26269 IX86_BUILTIN_PMINSD128,
26270 IX86_BUILTIN_PMINUD128,
26271 IX86_BUILTIN_PMINUW128,
26273 IX86_BUILTIN_PMOVSXBW128,
26274 IX86_BUILTIN_PMOVSXBD128,
26275 IX86_BUILTIN_PMOVSXBQ128,
26276 IX86_BUILTIN_PMOVSXWD128,
26277 IX86_BUILTIN_PMOVSXWQ128,
26278 IX86_BUILTIN_PMOVSXDQ128,
26280 IX86_BUILTIN_PMOVZXBW128,
26281 IX86_BUILTIN_PMOVZXBD128,
26282 IX86_BUILTIN_PMOVZXBQ128,
26283 IX86_BUILTIN_PMOVZXWD128,
26284 IX86_BUILTIN_PMOVZXWQ128,
26285 IX86_BUILTIN_PMOVZXDQ128,
26287 IX86_BUILTIN_PMULDQ128,
26288 IX86_BUILTIN_PMULLD128,
26290 IX86_BUILTIN_ROUNDSD,
26291 IX86_BUILTIN_ROUNDSS,
26293 IX86_BUILTIN_ROUNDPD,
26294 IX86_BUILTIN_ROUNDPS,
26296 IX86_BUILTIN_FLOORPD,
26297 IX86_BUILTIN_CEILPD,
26298 IX86_BUILTIN_TRUNCPD,
26299 IX86_BUILTIN_RINTPD,
26300 IX86_BUILTIN_ROUNDPD_AZ,
26302 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
26303 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
26304 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
26306 IX86_BUILTIN_FLOORPS,
26307 IX86_BUILTIN_CEILPS,
26308 IX86_BUILTIN_TRUNCPS,
26309 IX86_BUILTIN_RINTPS,
26310 IX86_BUILTIN_ROUNDPS_AZ,
26312 IX86_BUILTIN_FLOORPS_SFIX,
26313 IX86_BUILTIN_CEILPS_SFIX,
26314 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
26316 IX86_BUILTIN_PTESTZ,
26317 IX86_BUILTIN_PTESTC,
26318 IX86_BUILTIN_PTESTNZC,
26320 IX86_BUILTIN_VEC_INIT_V2SI,
26321 IX86_BUILTIN_VEC_INIT_V4HI,
26322 IX86_BUILTIN_VEC_INIT_V8QI,
26323 IX86_BUILTIN_VEC_EXT_V2DF,
26324 IX86_BUILTIN_VEC_EXT_V2DI,
26325 IX86_BUILTIN_VEC_EXT_V4SF,
26326 IX86_BUILTIN_VEC_EXT_V4SI,
26327 IX86_BUILTIN_VEC_EXT_V8HI,
26328 IX86_BUILTIN_VEC_EXT_V2SI,
26329 IX86_BUILTIN_VEC_EXT_V4HI,
26330 IX86_BUILTIN_VEC_EXT_V16QI,
26331 IX86_BUILTIN_VEC_SET_V2DI,
26332 IX86_BUILTIN_VEC_SET_V4SF,
26333 IX86_BUILTIN_VEC_SET_V4SI,
26334 IX86_BUILTIN_VEC_SET_V8HI,
26335 IX86_BUILTIN_VEC_SET_V4HI,
26336 IX86_BUILTIN_VEC_SET_V16QI,
26338 IX86_BUILTIN_VEC_PACK_SFIX,
26339 IX86_BUILTIN_VEC_PACK_SFIX256,
26342 IX86_BUILTIN_CRC32QI,
26343 IX86_BUILTIN_CRC32HI,
26344 IX86_BUILTIN_CRC32SI,
26345 IX86_BUILTIN_CRC32DI,
26347 IX86_BUILTIN_PCMPESTRI128,
26348 IX86_BUILTIN_PCMPESTRM128,
26349 IX86_BUILTIN_PCMPESTRA128,
26350 IX86_BUILTIN_PCMPESTRC128,
26351 IX86_BUILTIN_PCMPESTRO128,
26352 IX86_BUILTIN_PCMPESTRS128,
26353 IX86_BUILTIN_PCMPESTRZ128,
26354 IX86_BUILTIN_PCMPISTRI128,
26355 IX86_BUILTIN_PCMPISTRM128,
26356 IX86_BUILTIN_PCMPISTRA128,
26357 IX86_BUILTIN_PCMPISTRC128,
26358 IX86_BUILTIN_PCMPISTRO128,
26359 IX86_BUILTIN_PCMPISTRS128,
26360 IX86_BUILTIN_PCMPISTRZ128,
26362 IX86_BUILTIN_PCMPGTQ,
26364 /* AES instructions */
26365 IX86_BUILTIN_AESENC128,
26366 IX86_BUILTIN_AESENCLAST128,
26367 IX86_BUILTIN_AESDEC128,
26368 IX86_BUILTIN_AESDECLAST128,
26369 IX86_BUILTIN_AESIMC128,
26370 IX86_BUILTIN_AESKEYGENASSIST128,
26372 /* PCLMUL instruction */
26373 IX86_BUILTIN_PCLMULQDQ128,
26376 IX86_BUILTIN_ADDPD256,
26377 IX86_BUILTIN_ADDPS256,
26378 IX86_BUILTIN_ADDSUBPD256,
26379 IX86_BUILTIN_ADDSUBPS256,
26380 IX86_BUILTIN_ANDPD256,
26381 IX86_BUILTIN_ANDPS256,
26382 IX86_BUILTIN_ANDNPD256,
26383 IX86_BUILTIN_ANDNPS256,
26384 IX86_BUILTIN_BLENDPD256,
26385 IX86_BUILTIN_BLENDPS256,
26386 IX86_BUILTIN_BLENDVPD256,
26387 IX86_BUILTIN_BLENDVPS256,
26388 IX86_BUILTIN_DIVPD256,
26389 IX86_BUILTIN_DIVPS256,
26390 IX86_BUILTIN_DPPS256,
26391 IX86_BUILTIN_HADDPD256,
26392 IX86_BUILTIN_HADDPS256,
26393 IX86_BUILTIN_HSUBPD256,
26394 IX86_BUILTIN_HSUBPS256,
26395 IX86_BUILTIN_MAXPD256,
26396 IX86_BUILTIN_MAXPS256,
26397 IX86_BUILTIN_MINPD256,
26398 IX86_BUILTIN_MINPS256,
26399 IX86_BUILTIN_MULPD256,
26400 IX86_BUILTIN_MULPS256,
26401 IX86_BUILTIN_ORPD256,
26402 IX86_BUILTIN_ORPS256,
26403 IX86_BUILTIN_SHUFPD256,
26404 IX86_BUILTIN_SHUFPS256,
26405 IX86_BUILTIN_SUBPD256,
26406 IX86_BUILTIN_SUBPS256,
26407 IX86_BUILTIN_XORPD256,
26408 IX86_BUILTIN_XORPS256,
26409 IX86_BUILTIN_CMPSD,
26410 IX86_BUILTIN_CMPSS,
26411 IX86_BUILTIN_CMPPD,
26412 IX86_BUILTIN_CMPPS,
26413 IX86_BUILTIN_CMPPD256,
26414 IX86_BUILTIN_CMPPS256,
26415 IX86_BUILTIN_CVTDQ2PD256,
26416 IX86_BUILTIN_CVTDQ2PS256,
26417 IX86_BUILTIN_CVTPD2PS256,
26418 IX86_BUILTIN_CVTPS2DQ256,
26419 IX86_BUILTIN_CVTPS2PD256,
26420 IX86_BUILTIN_CVTTPD2DQ256,
26421 IX86_BUILTIN_CVTPD2DQ256,
26422 IX86_BUILTIN_CVTTPS2DQ256,
26423 IX86_BUILTIN_EXTRACTF128PD256,
26424 IX86_BUILTIN_EXTRACTF128PS256,
26425 IX86_BUILTIN_EXTRACTF128SI256,
26426 IX86_BUILTIN_VZEROALL,
26427 IX86_BUILTIN_VZEROUPPER,
26428 IX86_BUILTIN_VPERMILVARPD,
26429 IX86_BUILTIN_VPERMILVARPS,
26430 IX86_BUILTIN_VPERMILVARPD256,
26431 IX86_BUILTIN_VPERMILVARPS256,
26432 IX86_BUILTIN_VPERMILPD,
26433 IX86_BUILTIN_VPERMILPS,
26434 IX86_BUILTIN_VPERMILPD256,
26435 IX86_BUILTIN_VPERMILPS256,
26436 IX86_BUILTIN_VPERMIL2PD,
26437 IX86_BUILTIN_VPERMIL2PS,
26438 IX86_BUILTIN_VPERMIL2PD256,
26439 IX86_BUILTIN_VPERMIL2PS256,
26440 IX86_BUILTIN_VPERM2F128PD256,
26441 IX86_BUILTIN_VPERM2F128PS256,
26442 IX86_BUILTIN_VPERM2F128SI256,
26443 IX86_BUILTIN_VBROADCASTSS,
26444 IX86_BUILTIN_VBROADCASTSD256,
26445 IX86_BUILTIN_VBROADCASTSS256,
26446 IX86_BUILTIN_VBROADCASTPD256,
26447 IX86_BUILTIN_VBROADCASTPS256,
26448 IX86_BUILTIN_VINSERTF128PD256,
26449 IX86_BUILTIN_VINSERTF128PS256,
26450 IX86_BUILTIN_VINSERTF128SI256,
26451 IX86_BUILTIN_LOADUPD256,
26452 IX86_BUILTIN_LOADUPS256,
26453 IX86_BUILTIN_STOREUPD256,
26454 IX86_BUILTIN_STOREUPS256,
26455 IX86_BUILTIN_LDDQU256,
26456 IX86_BUILTIN_MOVNTDQ256,
26457 IX86_BUILTIN_MOVNTPD256,
26458 IX86_BUILTIN_MOVNTPS256,
26459 IX86_BUILTIN_LOADDQU256,
26460 IX86_BUILTIN_STOREDQU256,
26461 IX86_BUILTIN_MASKLOADPD,
26462 IX86_BUILTIN_MASKLOADPS,
26463 IX86_BUILTIN_MASKSTOREPD,
26464 IX86_BUILTIN_MASKSTOREPS,
26465 IX86_BUILTIN_MASKLOADPD256,
26466 IX86_BUILTIN_MASKLOADPS256,
26467 IX86_BUILTIN_MASKSTOREPD256,
26468 IX86_BUILTIN_MASKSTOREPS256,
26469 IX86_BUILTIN_MOVSHDUP256,
26470 IX86_BUILTIN_MOVSLDUP256,
26471 IX86_BUILTIN_MOVDDUP256,
26473 IX86_BUILTIN_SQRTPD256,
26474 IX86_BUILTIN_SQRTPS256,
26475 IX86_BUILTIN_SQRTPS_NR256,
26476 IX86_BUILTIN_RSQRTPS256,
26477 IX86_BUILTIN_RSQRTPS_NR256,
26479 IX86_BUILTIN_RCPPS256,
26481 IX86_BUILTIN_ROUNDPD256,
26482 IX86_BUILTIN_ROUNDPS256,
26484 IX86_BUILTIN_FLOORPD256,
26485 IX86_BUILTIN_CEILPD256,
26486 IX86_BUILTIN_TRUNCPD256,
26487 IX86_BUILTIN_RINTPD256,
26488 IX86_BUILTIN_ROUNDPD_AZ256,
26490 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
26491 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
26492 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
26494 IX86_BUILTIN_FLOORPS256,
26495 IX86_BUILTIN_CEILPS256,
26496 IX86_BUILTIN_TRUNCPS256,
26497 IX86_BUILTIN_RINTPS256,
26498 IX86_BUILTIN_ROUNDPS_AZ256,
26500 IX86_BUILTIN_FLOORPS_SFIX256,
26501 IX86_BUILTIN_CEILPS_SFIX256,
26502 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
26504 IX86_BUILTIN_UNPCKHPD256,
26505 IX86_BUILTIN_UNPCKLPD256,
26506 IX86_BUILTIN_UNPCKHPS256,
26507 IX86_BUILTIN_UNPCKLPS256,
26509 IX86_BUILTIN_SI256_SI,
26510 IX86_BUILTIN_PS256_PS,
26511 IX86_BUILTIN_PD256_PD,
26512 IX86_BUILTIN_SI_SI256,
26513 IX86_BUILTIN_PS_PS256,
26514 IX86_BUILTIN_PD_PD256,
26516 IX86_BUILTIN_VTESTZPD,
26517 IX86_BUILTIN_VTESTCPD,
26518 IX86_BUILTIN_VTESTNZCPD,
26519 IX86_BUILTIN_VTESTZPS,
26520 IX86_BUILTIN_VTESTCPS,
26521 IX86_BUILTIN_VTESTNZCPS,
26522 IX86_BUILTIN_VTESTZPD256,
26523 IX86_BUILTIN_VTESTCPD256,
26524 IX86_BUILTIN_VTESTNZCPD256,
26525 IX86_BUILTIN_VTESTZPS256,
26526 IX86_BUILTIN_VTESTCPS256,
26527 IX86_BUILTIN_VTESTNZCPS256,
26528 IX86_BUILTIN_PTESTZ256,
26529 IX86_BUILTIN_PTESTC256,
26530 IX86_BUILTIN_PTESTNZC256,
26532 IX86_BUILTIN_MOVMSKPD256,
26533 IX86_BUILTIN_MOVMSKPS256,
26536 IX86_BUILTIN_MPSADBW256,
26537 IX86_BUILTIN_PABSB256,
26538 IX86_BUILTIN_PABSW256,
26539 IX86_BUILTIN_PABSD256,
26540 IX86_BUILTIN_PACKSSDW256,
26541 IX86_BUILTIN_PACKSSWB256,
26542 IX86_BUILTIN_PACKUSDW256,
26543 IX86_BUILTIN_PACKUSWB256,
26544 IX86_BUILTIN_PADDB256,
26545 IX86_BUILTIN_PADDW256,
26546 IX86_BUILTIN_PADDD256,
26547 IX86_BUILTIN_PADDQ256,
26548 IX86_BUILTIN_PADDSB256,
26549 IX86_BUILTIN_PADDSW256,
26550 IX86_BUILTIN_PADDUSB256,
26551 IX86_BUILTIN_PADDUSW256,
26552 IX86_BUILTIN_PALIGNR256,
26553 IX86_BUILTIN_AND256I,
26554 IX86_BUILTIN_ANDNOT256I,
26555 IX86_BUILTIN_PAVGB256,
26556 IX86_BUILTIN_PAVGW256,
26557 IX86_BUILTIN_PBLENDVB256,
26558 IX86_BUILTIN_PBLENDVW256,
26559 IX86_BUILTIN_PCMPEQB256,
26560 IX86_BUILTIN_PCMPEQW256,
26561 IX86_BUILTIN_PCMPEQD256,
26562 IX86_BUILTIN_PCMPEQQ256,
26563 IX86_BUILTIN_PCMPGTB256,
26564 IX86_BUILTIN_PCMPGTW256,
26565 IX86_BUILTIN_PCMPGTD256,
26566 IX86_BUILTIN_PCMPGTQ256,
26567 IX86_BUILTIN_PHADDW256,
26568 IX86_BUILTIN_PHADDD256,
26569 IX86_BUILTIN_PHADDSW256,
26570 IX86_BUILTIN_PHSUBW256,
26571 IX86_BUILTIN_PHSUBD256,
26572 IX86_BUILTIN_PHSUBSW256,
26573 IX86_BUILTIN_PMADDUBSW256,
26574 IX86_BUILTIN_PMADDWD256,
26575 IX86_BUILTIN_PMAXSB256,
26576 IX86_BUILTIN_PMAXSW256,
26577 IX86_BUILTIN_PMAXSD256,
26578 IX86_BUILTIN_PMAXUB256,
26579 IX86_BUILTIN_PMAXUW256,
26580 IX86_BUILTIN_PMAXUD256,
26581 IX86_BUILTIN_PMINSB256,
26582 IX86_BUILTIN_PMINSW256,
26583 IX86_BUILTIN_PMINSD256,
26584 IX86_BUILTIN_PMINUB256,
26585 IX86_BUILTIN_PMINUW256,
26586 IX86_BUILTIN_PMINUD256,
26587 IX86_BUILTIN_PMOVMSKB256,
26588 IX86_BUILTIN_PMOVSXBW256,
26589 IX86_BUILTIN_PMOVSXBD256,
26590 IX86_BUILTIN_PMOVSXBQ256,
26591 IX86_BUILTIN_PMOVSXWD256,
26592 IX86_BUILTIN_PMOVSXWQ256,
26593 IX86_BUILTIN_PMOVSXDQ256,
26594 IX86_BUILTIN_PMOVZXBW256,
26595 IX86_BUILTIN_PMOVZXBD256,
26596 IX86_BUILTIN_PMOVZXBQ256,
26597 IX86_BUILTIN_PMOVZXWD256,
26598 IX86_BUILTIN_PMOVZXWQ256,
26599 IX86_BUILTIN_PMOVZXDQ256,
26600 IX86_BUILTIN_PMULDQ256,
26601 IX86_BUILTIN_PMULHRSW256,
26602 IX86_BUILTIN_PMULHUW256,
26603 IX86_BUILTIN_PMULHW256,
26604 IX86_BUILTIN_PMULLW256,
26605 IX86_BUILTIN_PMULLD256,
26606 IX86_BUILTIN_PMULUDQ256,
26607 IX86_BUILTIN_POR256,
26608 IX86_BUILTIN_PSADBW256,
26609 IX86_BUILTIN_PSHUFB256,
26610 IX86_BUILTIN_PSHUFD256,
26611 IX86_BUILTIN_PSHUFHW256,
26612 IX86_BUILTIN_PSHUFLW256,
26613 IX86_BUILTIN_PSIGNB256,
26614 IX86_BUILTIN_PSIGNW256,
26615 IX86_BUILTIN_PSIGND256,
26616 IX86_BUILTIN_PSLLDQI256,
26617 IX86_BUILTIN_PSLLWI256,
26618 IX86_BUILTIN_PSLLW256,
26619 IX86_BUILTIN_PSLLDI256,
26620 IX86_BUILTIN_PSLLD256,
26621 IX86_BUILTIN_PSLLQI256,
26622 IX86_BUILTIN_PSLLQ256,
26623 IX86_BUILTIN_PSRAWI256,
26624 IX86_BUILTIN_PSRAW256,
26625 IX86_BUILTIN_PSRADI256,
26626 IX86_BUILTIN_PSRAD256,
26627 IX86_BUILTIN_PSRLDQI256,
26628 IX86_BUILTIN_PSRLWI256,
26629 IX86_BUILTIN_PSRLW256,
26630 IX86_BUILTIN_PSRLDI256,
26631 IX86_BUILTIN_PSRLD256,
26632 IX86_BUILTIN_PSRLQI256,
26633 IX86_BUILTIN_PSRLQ256,
26634 IX86_BUILTIN_PSUBB256,
26635 IX86_BUILTIN_PSUBW256,
26636 IX86_BUILTIN_PSUBD256,
26637 IX86_BUILTIN_PSUBQ256,
26638 IX86_BUILTIN_PSUBSB256,
26639 IX86_BUILTIN_PSUBSW256,
26640 IX86_BUILTIN_PSUBUSB256,
26641 IX86_BUILTIN_PSUBUSW256,
26642 IX86_BUILTIN_PUNPCKHBW256,
26643 IX86_BUILTIN_PUNPCKHWD256,
26644 IX86_BUILTIN_PUNPCKHDQ256,
26645 IX86_BUILTIN_PUNPCKHQDQ256,
26646 IX86_BUILTIN_PUNPCKLBW256,
26647 IX86_BUILTIN_PUNPCKLWD256,
26648 IX86_BUILTIN_PUNPCKLDQ256,
26649 IX86_BUILTIN_PUNPCKLQDQ256,
26650 IX86_BUILTIN_PXOR256,
26651 IX86_BUILTIN_MOVNTDQA256,
26652 IX86_BUILTIN_VBROADCASTSS_PS,
26653 IX86_BUILTIN_VBROADCASTSS_PS256,
26654 IX86_BUILTIN_VBROADCASTSD_PD256,
26655 IX86_BUILTIN_VBROADCASTSI256,
26656 IX86_BUILTIN_PBLENDD256,
26657 IX86_BUILTIN_PBLENDD128,
26658 IX86_BUILTIN_PBROADCASTB256,
26659 IX86_BUILTIN_PBROADCASTW256,
26660 IX86_BUILTIN_PBROADCASTD256,
26661 IX86_BUILTIN_PBROADCASTQ256,
26662 IX86_BUILTIN_PBROADCASTB128,
26663 IX86_BUILTIN_PBROADCASTW128,
26664 IX86_BUILTIN_PBROADCASTD128,
26665 IX86_BUILTIN_PBROADCASTQ128,
26666 IX86_BUILTIN_VPERMVARSI256,
26667 IX86_BUILTIN_VPERMDF256,
26668 IX86_BUILTIN_VPERMVARSF256,
26669 IX86_BUILTIN_VPERMDI256,
26670 IX86_BUILTIN_VPERMTI256,
26671 IX86_BUILTIN_VEXTRACT128I256,
26672 IX86_BUILTIN_VINSERT128I256,
26673 IX86_BUILTIN_MASKLOADD,
26674 IX86_BUILTIN_MASKLOADQ,
26675 IX86_BUILTIN_MASKLOADD256,
26676 IX86_BUILTIN_MASKLOADQ256,
26677 IX86_BUILTIN_MASKSTORED,
26678 IX86_BUILTIN_MASKSTOREQ,
26679 IX86_BUILTIN_MASKSTORED256,
26680 IX86_BUILTIN_MASKSTOREQ256,
26681 IX86_BUILTIN_PSLLVV4DI,
26682 IX86_BUILTIN_PSLLVV2DI,
26683 IX86_BUILTIN_PSLLVV8SI,
26684 IX86_BUILTIN_PSLLVV4SI,
26685 IX86_BUILTIN_PSRAVV8SI,
26686 IX86_BUILTIN_PSRAVV4SI,
26687 IX86_BUILTIN_PSRLVV4DI,
26688 IX86_BUILTIN_PSRLVV2DI,
26689 IX86_BUILTIN_PSRLVV8SI,
26690 IX86_BUILTIN_PSRLVV4SI,
26692 IX86_BUILTIN_GATHERSIV2DF,
26693 IX86_BUILTIN_GATHERSIV4DF,
26694 IX86_BUILTIN_GATHERDIV2DF,
26695 IX86_BUILTIN_GATHERDIV4DF,
26696 IX86_BUILTIN_GATHERSIV4SF,
26697 IX86_BUILTIN_GATHERSIV8SF,
26698 IX86_BUILTIN_GATHERDIV4SF,
26699 IX86_BUILTIN_GATHERDIV8SF,
26700 IX86_BUILTIN_GATHERSIV2DI,
26701 IX86_BUILTIN_GATHERSIV4DI,
26702 IX86_BUILTIN_GATHERDIV2DI,
26703 IX86_BUILTIN_GATHERDIV4DI,
26704 IX86_BUILTIN_GATHERSIV4SI,
26705 IX86_BUILTIN_GATHERSIV8SI,
26706 IX86_BUILTIN_GATHERDIV4SI,
26707 IX86_BUILTIN_GATHERDIV8SI,
26709 /* Alternate 4 element gather for the vectorizer where
26710 all operands are 32-byte wide. */
26711 IX86_BUILTIN_GATHERALTSIV4DF,
26712 IX86_BUILTIN_GATHERALTDIV8SF,
26713 IX86_BUILTIN_GATHERALTSIV4DI,
26714 IX86_BUILTIN_GATHERALTDIV8SI,
26716 /* TFmode support builtins. */
26718 IX86_BUILTIN_HUGE_VALQ,
26719 IX86_BUILTIN_FABSQ,
26720 IX86_BUILTIN_COPYSIGNQ,
26722 /* Vectorizer support builtins. */
26723 IX86_BUILTIN_CPYSGNPS,
26724 IX86_BUILTIN_CPYSGNPD,
26725 IX86_BUILTIN_CPYSGNPS256,
26726 IX86_BUILTIN_CPYSGNPD256,
26728 /* FMA4 instructions. */
26729 IX86_BUILTIN_VFMADDSS,
26730 IX86_BUILTIN_VFMADDSD,
26731 IX86_BUILTIN_VFMADDPS,
26732 IX86_BUILTIN_VFMADDPD,
26733 IX86_BUILTIN_VFMADDPS256,
26734 IX86_BUILTIN_VFMADDPD256,
26735 IX86_BUILTIN_VFMADDSUBPS,
26736 IX86_BUILTIN_VFMADDSUBPD,
26737 IX86_BUILTIN_VFMADDSUBPS256,
26738 IX86_BUILTIN_VFMADDSUBPD256,
26740 /* FMA3 instructions. */
26741 IX86_BUILTIN_VFMADDSS3,
26742 IX86_BUILTIN_VFMADDSD3,
26744 /* XOP instructions. */
26745 IX86_BUILTIN_VPCMOV,
26746 IX86_BUILTIN_VPCMOV_V2DI,
26747 IX86_BUILTIN_VPCMOV_V4SI,
26748 IX86_BUILTIN_VPCMOV_V8HI,
26749 IX86_BUILTIN_VPCMOV_V16QI,
26750 IX86_BUILTIN_VPCMOV_V4SF,
26751 IX86_BUILTIN_VPCMOV_V2DF,
26752 IX86_BUILTIN_VPCMOV256,
26753 IX86_BUILTIN_VPCMOV_V4DI256,
26754 IX86_BUILTIN_VPCMOV_V8SI256,
26755 IX86_BUILTIN_VPCMOV_V16HI256,
26756 IX86_BUILTIN_VPCMOV_V32QI256,
26757 IX86_BUILTIN_VPCMOV_V8SF256,
26758 IX86_BUILTIN_VPCMOV_V4DF256,
26760 IX86_BUILTIN_VPPERM,
26762 IX86_BUILTIN_VPMACSSWW,
26763 IX86_BUILTIN_VPMACSWW,
26764 IX86_BUILTIN_VPMACSSWD,
26765 IX86_BUILTIN_VPMACSWD,
26766 IX86_BUILTIN_VPMACSSDD,
26767 IX86_BUILTIN_VPMACSDD,
26768 IX86_BUILTIN_VPMACSSDQL,
26769 IX86_BUILTIN_VPMACSSDQH,
26770 IX86_BUILTIN_VPMACSDQL,
26771 IX86_BUILTIN_VPMACSDQH,
26772 IX86_BUILTIN_VPMADCSSWD,
26773 IX86_BUILTIN_VPMADCSWD,
26775 IX86_BUILTIN_VPHADDBW,
26776 IX86_BUILTIN_VPHADDBD,
26777 IX86_BUILTIN_VPHADDBQ,
26778 IX86_BUILTIN_VPHADDWD,
26779 IX86_BUILTIN_VPHADDWQ,
26780 IX86_BUILTIN_VPHADDDQ,
26781 IX86_BUILTIN_VPHADDUBW,
26782 IX86_BUILTIN_VPHADDUBD,
26783 IX86_BUILTIN_VPHADDUBQ,
26784 IX86_BUILTIN_VPHADDUWD,
26785 IX86_BUILTIN_VPHADDUWQ,
26786 IX86_BUILTIN_VPHADDUDQ,
26787 IX86_BUILTIN_VPHSUBBW,
26788 IX86_BUILTIN_VPHSUBWD,
26789 IX86_BUILTIN_VPHSUBDQ,
26791 IX86_BUILTIN_VPROTB,
26792 IX86_BUILTIN_VPROTW,
26793 IX86_BUILTIN_VPROTD,
26794 IX86_BUILTIN_VPROTQ,
26795 IX86_BUILTIN_VPROTB_IMM,
26796 IX86_BUILTIN_VPROTW_IMM,
26797 IX86_BUILTIN_VPROTD_IMM,
26798 IX86_BUILTIN_VPROTQ_IMM,
26800 IX86_BUILTIN_VPSHLB,
26801 IX86_BUILTIN_VPSHLW,
26802 IX86_BUILTIN_VPSHLD,
26803 IX86_BUILTIN_VPSHLQ,
26804 IX86_BUILTIN_VPSHAB,
26805 IX86_BUILTIN_VPSHAW,
26806 IX86_BUILTIN_VPSHAD,
26807 IX86_BUILTIN_VPSHAQ,
26809 IX86_BUILTIN_VFRCZSS,
26810 IX86_BUILTIN_VFRCZSD,
26811 IX86_BUILTIN_VFRCZPS,
26812 IX86_BUILTIN_VFRCZPD,
26813 IX86_BUILTIN_VFRCZPS256,
26814 IX86_BUILTIN_VFRCZPD256,
26816 IX86_BUILTIN_VPCOMEQUB,
26817 IX86_BUILTIN_VPCOMNEUB,
26818 IX86_BUILTIN_VPCOMLTUB,
26819 IX86_BUILTIN_VPCOMLEUB,
26820 IX86_BUILTIN_VPCOMGTUB,
26821 IX86_BUILTIN_VPCOMGEUB,
26822 IX86_BUILTIN_VPCOMFALSEUB,
26823 IX86_BUILTIN_VPCOMTRUEUB,
26825 IX86_BUILTIN_VPCOMEQUW,
26826 IX86_BUILTIN_VPCOMNEUW,
26827 IX86_BUILTIN_VPCOMLTUW,
26828 IX86_BUILTIN_VPCOMLEUW,
26829 IX86_BUILTIN_VPCOMGTUW,
26830 IX86_BUILTIN_VPCOMGEUW,
26831 IX86_BUILTIN_VPCOMFALSEUW,
26832 IX86_BUILTIN_VPCOMTRUEUW,
26834 IX86_BUILTIN_VPCOMEQUD,
26835 IX86_BUILTIN_VPCOMNEUD,
26836 IX86_BUILTIN_VPCOMLTUD,
26837 IX86_BUILTIN_VPCOMLEUD,
26838 IX86_BUILTIN_VPCOMGTUD,
26839 IX86_BUILTIN_VPCOMGEUD,
26840 IX86_BUILTIN_VPCOMFALSEUD,
26841 IX86_BUILTIN_VPCOMTRUEUD,
26843 IX86_BUILTIN_VPCOMEQUQ,
26844 IX86_BUILTIN_VPCOMNEUQ,
26845 IX86_BUILTIN_VPCOMLTUQ,
26846 IX86_BUILTIN_VPCOMLEUQ,
26847 IX86_BUILTIN_VPCOMGTUQ,
26848 IX86_BUILTIN_VPCOMGEUQ,
26849 IX86_BUILTIN_VPCOMFALSEUQ,
26850 IX86_BUILTIN_VPCOMTRUEUQ,
26852 IX86_BUILTIN_VPCOMEQB,
26853 IX86_BUILTIN_VPCOMNEB,
26854 IX86_BUILTIN_VPCOMLTB,
26855 IX86_BUILTIN_VPCOMLEB,
26856 IX86_BUILTIN_VPCOMGTB,
26857 IX86_BUILTIN_VPCOMGEB,
26858 IX86_BUILTIN_VPCOMFALSEB,
26859 IX86_BUILTIN_VPCOMTRUEB,
26861 IX86_BUILTIN_VPCOMEQW,
26862 IX86_BUILTIN_VPCOMNEW,
26863 IX86_BUILTIN_VPCOMLTW,
26864 IX86_BUILTIN_VPCOMLEW,
26865 IX86_BUILTIN_VPCOMGTW,
26866 IX86_BUILTIN_VPCOMGEW,
26867 IX86_BUILTIN_VPCOMFALSEW,
26868 IX86_BUILTIN_VPCOMTRUEW,
26870 IX86_BUILTIN_VPCOMEQD,
26871 IX86_BUILTIN_VPCOMNED,
26872 IX86_BUILTIN_VPCOMLTD,
26873 IX86_BUILTIN_VPCOMLED,
26874 IX86_BUILTIN_VPCOMGTD,
26875 IX86_BUILTIN_VPCOMGED,
26876 IX86_BUILTIN_VPCOMFALSED,
26877 IX86_BUILTIN_VPCOMTRUED,
26879 IX86_BUILTIN_VPCOMEQQ,
26880 IX86_BUILTIN_VPCOMNEQ,
26881 IX86_BUILTIN_VPCOMLTQ,
26882 IX86_BUILTIN_VPCOMLEQ,
26883 IX86_BUILTIN_VPCOMGTQ,
26884 IX86_BUILTIN_VPCOMGEQ,
26885 IX86_BUILTIN_VPCOMFALSEQ,
26886 IX86_BUILTIN_VPCOMTRUEQ,
26888 /* LWP instructions. */
26889 IX86_BUILTIN_LLWPCB,
26890 IX86_BUILTIN_SLWPCB,
26891 IX86_BUILTIN_LWPVAL32,
26892 IX86_BUILTIN_LWPVAL64,
26893 IX86_BUILTIN_LWPINS32,
26894 IX86_BUILTIN_LWPINS64,
26899 IX86_BUILTIN_XBEGIN,
26901 IX86_BUILTIN_XABORT,
26902 IX86_BUILTIN_XTEST,
26904 /* BMI instructions. */
26905 IX86_BUILTIN_BEXTR32,
26906 IX86_BUILTIN_BEXTR64,
26909 /* TBM instructions. */
26910 IX86_BUILTIN_BEXTRI32,
26911 IX86_BUILTIN_BEXTRI64,
26913 /* BMI2 instructions. */
26914 IX86_BUILTIN_BZHI32,
26915 IX86_BUILTIN_BZHI64,
26916 IX86_BUILTIN_PDEP32,
26917 IX86_BUILTIN_PDEP64,
26918 IX86_BUILTIN_PEXT32,
26919 IX86_BUILTIN_PEXT64,
26921 /* ADX instructions. */
26922 IX86_BUILTIN_ADDCARRYX32,
26923 IX86_BUILTIN_ADDCARRYX64,
26925 /* FSGSBASE instructions. */
26926 IX86_BUILTIN_RDFSBASE32,
26927 IX86_BUILTIN_RDFSBASE64,
26928 IX86_BUILTIN_RDGSBASE32,
26929 IX86_BUILTIN_RDGSBASE64,
26930 IX86_BUILTIN_WRFSBASE32,
26931 IX86_BUILTIN_WRFSBASE64,
26932 IX86_BUILTIN_WRGSBASE32,
26933 IX86_BUILTIN_WRGSBASE64,
26935 /* RDRND instructions. */
26936 IX86_BUILTIN_RDRAND16_STEP,
26937 IX86_BUILTIN_RDRAND32_STEP,
26938 IX86_BUILTIN_RDRAND64_STEP,
26940 /* RDSEED instructions. */
26941 IX86_BUILTIN_RDSEED16_STEP,
26942 IX86_BUILTIN_RDSEED32_STEP,
26943 IX86_BUILTIN_RDSEED64_STEP,
26945 /* F16C instructions. */
26946 IX86_BUILTIN_CVTPH2PS,
26947 IX86_BUILTIN_CVTPH2PS256,
26948 IX86_BUILTIN_CVTPS2PH,
26949 IX86_BUILTIN_CVTPS2PH256,
26951 /* CFString built-in for darwin */
26952 IX86_BUILTIN_CFSTRING,
26954 /* Builtins to get CPU type and supported features. */
26955 IX86_BUILTIN_CPU_INIT,
26956 IX86_BUILTIN_CPU_IS,
26957 IX86_BUILTIN_CPU_SUPPORTS,
26962 /* Table for the ix86 builtin decls. */
26963 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
26965 /* Table of all of the builtin functions that are possible with different ISA's
26966 but are waiting to be built until a function is declared to use that
26968 struct builtin_isa {
26969 const char *name; /* function name */
26970 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
26971 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
26972 bool const_p; /* true if the declaration is constant */
26973 bool set_and_not_built_p;
26976 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
26979 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26980 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26981 function decl in the ix86_builtins array. Returns the function decl or
26982 NULL_TREE, if the builtin was not added.
26984 If the front end has a special hook for builtin functions, delay adding
26985 builtin functions that aren't in the current ISA until the ISA is changed
26986 with function specific optimization. Doing so, can save about 300K for the
26987 default compiler. When the builtin is expanded, check at that time whether
26990 If the front end doesn't have a special hook, record all builtins, even if
26991 it isn't an instruction set in the current ISA in case the user uses
26992 function specific options for a different ISA, so that we don't get scope
26993 errors if a builtin is added in the middle of a function scope. */
26996 def_builtin (HOST_WIDE_INT mask, const char *name,
26997 enum ix86_builtin_func_type tcode,
26998 enum ix86_builtins code)
27000 tree decl = NULL_TREE;
27002 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
27004 ix86_builtins_isa[(int) code].isa = mask;
27006 mask &= ~OPTION_MASK_ISA_64BIT;
27008 || (mask & ix86_isa_flags) != 0
27009 || (lang_hooks.builtin_function
27010 == lang_hooks.builtin_function_ext_scope))
27013 tree type = ix86_get_builtin_func_type (tcode);
27014 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
27016 ix86_builtins[(int) code] = decl;
27017 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
27021 ix86_builtins[(int) code] = NULL_TREE;
27022 ix86_builtins_isa[(int) code].tcode = tcode;
27023 ix86_builtins_isa[(int) code].name = name;
27024 ix86_builtins_isa[(int) code].const_p = false;
27025 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
27032 /* Like def_builtin, but also marks the function decl "const". */
27035 def_builtin_const (HOST_WIDE_INT mask, const char *name,
27036 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
27038 tree decl = def_builtin (mask, name, tcode, code);
27040 TREE_READONLY (decl) = 1;
27042 ix86_builtins_isa[(int) code].const_p = true;
27047 /* Add any new builtin functions for a given ISA that may not have been
27048 declared. This saves a bit of space compared to adding all of the
27049 declarations to the tree, even if we didn't use them. */
27052 ix86_add_new_builtins (HOST_WIDE_INT isa)
27056 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
27058 if ((ix86_builtins_isa[i].isa & isa) != 0
27059 && ix86_builtins_isa[i].set_and_not_built_p)
27063 /* Don't define the builtin again. */
27064 ix86_builtins_isa[i].set_and_not_built_p = false;
27066 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
27067 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
27068 type, i, BUILT_IN_MD, NULL,
27071 ix86_builtins[i] = decl;
27072 if (ix86_builtins_isa[i].const_p)
27073 TREE_READONLY (decl) = 1;
27078 /* Bits for builtin_description.flag. */
27080 /* Set when we don't support the comparison natively, and should
27081 swap_comparison in order to support it. */
27082 #define BUILTIN_DESC_SWAP_OPERANDS 1
27084 struct builtin_description
27086 const HOST_WIDE_INT mask;
27087 const enum insn_code icode;
27088 const char *const name;
27089 const enum ix86_builtins code;
27090 const enum rtx_code comparison;
27094 static const struct builtin_description bdesc_comi[] =
27096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
27097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
27098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
27099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
27100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
27101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
27102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
27103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
27104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
27105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
27106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
27107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
27108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
27109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
27110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
27111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
27112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
27113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
27114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
27115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
27116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
27117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
27118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
27119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
27122 static const struct builtin_description bdesc_pcmpestr[] =
27125 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
27126 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
27127 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
27128 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
27129 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
27130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
27131 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
27134 static const struct builtin_description bdesc_pcmpistr[] =
27137 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
27138 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
27139 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
27140 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
27141 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
27142 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
27143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
27146 /* Special builtins with variable number of arguments. */
27147 static const struct builtin_description bdesc_special_args[] =
27149 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
27150 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
27151 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
27154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
27157 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
27159 /* FXSR, XSAVE and XSAVEOPT */
27160 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
27161 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
27162 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27163 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27164 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27166 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
27167 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
27168 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27169 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27170 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
27174 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
27175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
27177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
27178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
27179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
27180 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
27182 /* SSE or 3DNow!A */
27183 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
27184 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
27187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
27188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
27189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
27190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
27191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
27192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
27193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
27194 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
27195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
27196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
27198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
27199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
27202 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
27205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
27208 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
27209 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
27212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
27213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
27215 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
27216 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
27217 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
27218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
27219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
27221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
27222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
27223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
27224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
27225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
27226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
27227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
27229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
27230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
27231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
27233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
27234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
27235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
27236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
27237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
27238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
27239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
27240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
27243 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
27244 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
27245 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
27246 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
27247 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
27248 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
27249 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
27250 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
27251 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
27253 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
27254 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
27255 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
27256 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
27257 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
27258 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
27261 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
27262 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
27263 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
27264 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
27265 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
27266 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
27267 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
27268 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
27271 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
27272 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
27273 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
27276 /* Builtins with variable number of arguments. */
27277 static const struct builtin_description bdesc_args[] =
27279 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
27280 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
27281 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
27282 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
27283 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
27284 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
27285 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
27288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
27324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
27326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
27327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
27328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
27330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
27332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
27335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
27339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
27342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
27346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
27353 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
27354 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27355 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27357 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27358 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27359 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27362 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27364 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27365 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27368 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27369 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27370 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27371 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27374 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
27375 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
27376 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
27377 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27378 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27379 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
27383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27384 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27386 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
27389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
27390 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
27391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
27392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
27393 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
27395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27397 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27398 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27399 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
27407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
27408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
27409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
27413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
27414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
27415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
27417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
27419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
27420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
27421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
27423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
27424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
27425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27429 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27430 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27434 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27436 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27437 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27439 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27444 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27445 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
27448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
27449 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
27451 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
27453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27457 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
27458 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
27460 /* SSE MMX or 3Dnow!A */
27461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27462 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27463 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27465 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27466 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27467 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27468 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27470 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
27471 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
27473 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
27476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
27479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
27480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
27481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
27482 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
27484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
27485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
27486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
27487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
27488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
27490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
27492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
27493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
27494 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
27495 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
27497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
27499 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27501 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27502 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27503 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27504 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
27511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
27512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
27513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
27515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
27517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
27518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
27519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
27523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
27524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
27525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
27527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
27528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
27529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27532 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27536 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27538 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27539 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27541 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27544 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27545 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27547 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
27549 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27550 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27551 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27552 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27553 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27554 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27555 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27556 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27567 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27568 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
27570 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27572 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27573 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27585 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27586 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27587 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27590 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27591 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27592 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27593 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27594 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27595 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27596 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27597 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
27600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
27601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
27603 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
27606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
27607 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
27609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
27611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
27612 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
27613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
27614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
27616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
27617 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27618 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27619 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
27620 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27621 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27622 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
27624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
27625 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27626 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27627 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
27628 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27629 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27630 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
27632 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27633 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27634 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27635 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
27638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
27639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
27641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
27643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27646 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
27647 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
27650 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
27651 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27653 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27654 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27655 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27656 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27657 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27658 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
27663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
27665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
27668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
27681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
27682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
27695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
27698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
27701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
27702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
27706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
27707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
27709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
27710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
27711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
27712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
27713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
27714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
27715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
27716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
27717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
27718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
27719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
27720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
27721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
27724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
27734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27737 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
27738 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
27739 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27740 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27742 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
27743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
27744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
27745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
27747 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
27748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
27750 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
27751 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
27753 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
27754 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
27755 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
27756 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
27758 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
27759 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
27761 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27762 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27764 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27765 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27766 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27769 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27770 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
27771 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
27772 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27773 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27776 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
27777 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
27778 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
27779 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27782 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
27783 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27785 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27786 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27787 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27788 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27791 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
27794 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27795 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27798 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27799 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27802 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27808 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27809 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27810 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27811 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27812 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27813 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27814 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27815 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27816 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27817 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27818 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27819 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
27822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
27823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
27824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
27826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
27829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
27830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
27840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
27841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
27842 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
27843 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
27844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
27845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
27847 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
27848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
27849 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
27854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
27855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
27858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
27859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
27861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27865 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27867 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27869 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
27877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
27878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
27879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
27881 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27882 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
27888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
27889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
27890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
27892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
27893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
27895 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27896 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27906 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
27907 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
27908 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
27910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
27927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
27929 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27930 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27932 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
27936 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
27937 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
27938 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
27939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27943 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27944 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27945 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27946 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
27952 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
27957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
27958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
27974 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27975 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27976 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27977 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27978 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27979 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27980 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27981 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27982 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27983 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27984 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27985 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
27987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27999 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
28000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28001 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28002 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28003 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28004 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28005 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
28006 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
28008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
28010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
28011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
28012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
28016 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
28017 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
28018 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
28019 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
28020 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
28021 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
28022 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
28023 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
28024 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
28025 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
28026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
28027 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
28028 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
28029 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
28030 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
28031 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
28032 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
28033 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28034 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28035 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28036 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
28046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
28047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28049 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
28051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
28052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
28053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
28054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
28055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
28056 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
28057 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
28058 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
28059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
28060 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
28061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
28062 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
28063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
28064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
28066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
28067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
28068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
28069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
28070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
28071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
28073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
28075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
28077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
28078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
28079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
28080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
28082 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
28085 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28086 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28087 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
28090 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28091 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28094 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
28095 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
28096 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
28097 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
28100 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28101 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28102 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28103 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28104 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28105 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28108 /* FMA4 and XOP. */
28109 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28110 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28111 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28112 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28113 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28114 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28115 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28116 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28117 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28118 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28119 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28120 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28121 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28122 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28123 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28124 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28125 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28126 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28127 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28128 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28129 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28130 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28131 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28132 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28133 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28134 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28135 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28136 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28137 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28138 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28139 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28140 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28141 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28142 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28143 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28144 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28145 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28146 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28147 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28148 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28149 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28150 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28151 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28152 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28153 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28154 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28155 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28156 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28157 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28158 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28159 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28160 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28162 static const struct builtin_description bdesc_multi_arg[] =
28164 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
28165 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
28166 UNKNOWN, (int)MULTI_ARG_3_SF },
28167 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
28168 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
28169 UNKNOWN, (int)MULTI_ARG_3_DF },
28171 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
28172 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
28173 UNKNOWN, (int)MULTI_ARG_3_SF },
28174 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
28175 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
28176 UNKNOWN, (int)MULTI_ARG_3_DF },
28178 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
28179 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
28180 UNKNOWN, (int)MULTI_ARG_3_SF },
28181 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
28182 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
28183 UNKNOWN, (int)MULTI_ARG_3_DF },
28184 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
28185 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
28186 UNKNOWN, (int)MULTI_ARG_3_SF2 },
28187 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
28188 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
28189 UNKNOWN, (int)MULTI_ARG_3_DF2 },
28191 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
28192 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
28193 UNKNOWN, (int)MULTI_ARG_3_SF },
28194 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
28195 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
28196 UNKNOWN, (int)MULTI_ARG_3_DF },
28197 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
28198 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
28199 UNKNOWN, (int)MULTI_ARG_3_SF2 },
28200 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
28201 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
28202 UNKNOWN, (int)MULTI_ARG_3_DF2 },
28204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
28205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
28206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
28207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
28208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
28209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
28210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
28212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
28213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
28214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
28215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
28216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
28217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
28218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
28220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
28222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
28223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
28224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
28227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
28228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
28236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
28237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
28238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
28239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
28240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
28241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
28242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
28243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
28244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
28245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
28246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
28247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
28248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
28249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
28250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
28252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
28253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
28254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
28255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
28256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
28257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
28259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
28260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
28261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
28262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
28263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
28264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
28265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
28266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
28267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
28268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
28269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
28270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
28271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
28272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
28273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
28275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
28276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
28277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
28278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
28279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
28280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
28281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
28283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
28284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
28285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
28286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
28287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
28288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
28289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
28291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
28292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
28293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
28294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
28295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
28296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
28297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
28299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
28300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
28301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
28302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
28303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
28304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
28305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
28307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
28308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
28309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
28310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
28311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
28312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
28313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
28315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
28316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
28317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
28318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
28319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
28320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
28321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
28323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
28324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
28325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
28326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
28327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
28328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
28329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
28331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
28332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
28333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
28334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
28335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
28336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
28337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
28339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
28340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
28341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
28342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
28343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
28344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
28345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
28346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
28348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
28349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
28350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
28351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
28352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
28353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
28354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
28355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
28357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
28358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
28359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
28360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
28364 /* TM vector builtins. */
28366 /* Reuse the existing x86-specific `struct builtin_description' cause
28367 we're lazy. Add casts to make them fit. */
28368 static const struct builtin_description bdesc_tm[] =
28370 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28371 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28372 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28373 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28374 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28375 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28376 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28378 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28379 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28380 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28381 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28382 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28383 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28384 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28386 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28387 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28388 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28389 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28390 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28391 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28392 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28394 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
28395 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
28396 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
28399 /* TM callbacks. */
28401 /* Return the builtin decl needed to load a vector of TYPE. */
28404 ix86_builtin_tm_load (tree type)
28406 if (TREE_CODE (type) == VECTOR_TYPE)
28408 switch (tree_low_cst (TYPE_SIZE (type), 1))
28411 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
28413 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
28415 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
28421 /* Return the builtin decl needed to store a vector of TYPE. */
28424 ix86_builtin_tm_store (tree type)
28426 if (TREE_CODE (type) == VECTOR_TYPE)
28428 switch (tree_low_cst (TYPE_SIZE (type), 1))
28431 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
28433 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
28435 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
28441 /* Initialize the transactional memory vector load/store builtins. */
28444 ix86_init_tm_builtins (void)
28446 enum ix86_builtin_func_type ftype;
28447 const struct builtin_description *d;
28450 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
28451 tree attrs_log, attrs_type_log;
28456 /* If there are no builtins defined, we must be compiling in a
28457 language without trans-mem support. */
28458 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
28461 /* Use whatever attributes a normal TM load has. */
28462 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
28463 attrs_load = DECL_ATTRIBUTES (decl);
28464 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28465 /* Use whatever attributes a normal TM store has. */
28466 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
28467 attrs_store = DECL_ATTRIBUTES (decl);
28468 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28469 /* Use whatever attributes a normal TM log has. */
28470 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
28471 attrs_log = DECL_ATTRIBUTES (decl);
28472 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28474 for (i = 0, d = bdesc_tm;
28475 i < ARRAY_SIZE (bdesc_tm);
28478 if ((d->mask & ix86_isa_flags) != 0
28479 || (lang_hooks.builtin_function
28480 == lang_hooks.builtin_function_ext_scope))
28482 tree type, attrs, attrs_type;
28483 enum built_in_function code = (enum built_in_function) d->code;
28485 ftype = (enum ix86_builtin_func_type) d->flag;
28486 type = ix86_get_builtin_func_type (ftype);
28488 if (BUILTIN_TM_LOAD_P (code))
28490 attrs = attrs_load;
28491 attrs_type = attrs_type_load;
28493 else if (BUILTIN_TM_STORE_P (code))
28495 attrs = attrs_store;
28496 attrs_type = attrs_type_store;
28501 attrs_type = attrs_type_log;
28503 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
28504 /* The builtin without the prefix for
28505 calling it directly. */
28506 d->name + strlen ("__builtin_"),
28508 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28509 set the TYPE_ATTRIBUTES. */
28510 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
28512 set_builtin_decl (code, decl, false);
28517 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28518 in the current target ISA to allow the user to compile particular modules
28519 with different target specific options that differ from the command line
28522 ix86_init_mmx_sse_builtins (void)
28524 const struct builtin_description * d;
28525 enum ix86_builtin_func_type ftype;
28528 /* Add all special builtins with variable number of operands. */
28529 for (i = 0, d = bdesc_special_args;
28530 i < ARRAY_SIZE (bdesc_special_args);
28536 ftype = (enum ix86_builtin_func_type) d->flag;
28537 def_builtin (d->mask, d->name, ftype, d->code);
28540 /* Add all builtins with variable number of operands. */
28541 for (i = 0, d = bdesc_args;
28542 i < ARRAY_SIZE (bdesc_args);
28548 ftype = (enum ix86_builtin_func_type) d->flag;
28549 def_builtin_const (d->mask, d->name, ftype, d->code);
28552 /* pcmpestr[im] insns. */
28553 for (i = 0, d = bdesc_pcmpestr;
28554 i < ARRAY_SIZE (bdesc_pcmpestr);
28557 if (d->code == IX86_BUILTIN_PCMPESTRM128)
28558 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
28560 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
28561 def_builtin_const (d->mask, d->name, ftype, d->code);
28564 /* pcmpistr[im] insns. */
28565 for (i = 0, d = bdesc_pcmpistr;
28566 i < ARRAY_SIZE (bdesc_pcmpistr);
28569 if (d->code == IX86_BUILTIN_PCMPISTRM128)
28570 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
28572 ftype = INT_FTYPE_V16QI_V16QI_INT;
28573 def_builtin_const (d->mask, d->name, ftype, d->code);
28576 /* comi/ucomi insns. */
28577 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28579 if (d->mask == OPTION_MASK_ISA_SSE2)
28580 ftype = INT_FTYPE_V2DF_V2DF;
28582 ftype = INT_FTYPE_V4SF_V4SF;
28583 def_builtin_const (d->mask, d->name, ftype, d->code);
28587 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
28588 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
28589 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
28590 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
28592 /* SSE or 3DNow!A */
28593 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28594 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
28595 IX86_BUILTIN_MASKMOVQ);
28598 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
28599 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
28601 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
28602 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
28603 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
28604 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
28607 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
28608 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
28609 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
28610 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
28613 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
28614 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
28615 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
28616 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
28617 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
28618 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
28619 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
28620 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
28621 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
28622 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
28623 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
28624 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
28627 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
28628 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
28631 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
28632 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
28633 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
28634 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
28635 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
28636 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
28637 IX86_BUILTIN_RDRAND64_STEP);
28640 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
28641 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
28642 IX86_BUILTIN_GATHERSIV2DF);
28644 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
28645 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
28646 IX86_BUILTIN_GATHERSIV4DF);
28648 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
28649 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
28650 IX86_BUILTIN_GATHERDIV2DF);
28652 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
28653 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
28654 IX86_BUILTIN_GATHERDIV4DF);
28656 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
28657 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
28658 IX86_BUILTIN_GATHERSIV4SF);
28660 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
28661 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
28662 IX86_BUILTIN_GATHERSIV8SF);
28664 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
28665 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
28666 IX86_BUILTIN_GATHERDIV4SF);
28668 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
28669 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
28670 IX86_BUILTIN_GATHERDIV8SF);
28672 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
28673 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
28674 IX86_BUILTIN_GATHERSIV2DI);
28676 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
28677 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
28678 IX86_BUILTIN_GATHERSIV4DI);
28680 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
28681 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
28682 IX86_BUILTIN_GATHERDIV2DI);
28684 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
28685 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
28686 IX86_BUILTIN_GATHERDIV4DI);
28688 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
28689 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
28690 IX86_BUILTIN_GATHERSIV4SI);
28692 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
28693 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
28694 IX86_BUILTIN_GATHERSIV8SI);
28696 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
28697 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
28698 IX86_BUILTIN_GATHERDIV4SI);
28700 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
28701 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
28702 IX86_BUILTIN_GATHERDIV8SI);
28704 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
28705 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
28706 IX86_BUILTIN_GATHERALTSIV4DF);
28708 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
28709 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
28710 IX86_BUILTIN_GATHERALTDIV8SF);
28712 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
28713 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
28714 IX86_BUILTIN_GATHERALTSIV4DI);
28716 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
28717 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
28718 IX86_BUILTIN_GATHERALTDIV8SI);
28721 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
28722 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
28724 /* MMX access to the vec_init patterns. */
28725 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
28726 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
28728 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
28729 V4HI_FTYPE_HI_HI_HI_HI,
28730 IX86_BUILTIN_VEC_INIT_V4HI);
28732 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
28733 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
28734 IX86_BUILTIN_VEC_INIT_V8QI);
28736 /* Access to the vec_extract patterns. */
28737 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
28738 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
28739 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
28740 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
28741 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
28742 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
28743 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
28744 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
28745 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
28746 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
28748 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28749 "__builtin_ia32_vec_ext_v4hi",
28750 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
28752 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
28753 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
28755 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
28756 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
28758 /* Access to the vec_set patterns. */
28759 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
28760 "__builtin_ia32_vec_set_v2di",
28761 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
28763 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
28764 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
28766 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
28767 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
28769 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
28770 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
28772 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28773 "__builtin_ia32_vec_set_v4hi",
28774 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
28776 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
28777 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
28780 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
28781 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
28782 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
28783 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
28784 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
28785 "__builtin_ia32_rdseed_di_step",
28786 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
28789 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28790 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
28791 def_builtin (OPTION_MASK_ISA_64BIT,
28792 "__builtin_ia32_addcarryx_u64",
28793 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
28794 IX86_BUILTIN_ADDCARRYX64);
28796 /* Add FMA4 multi-arg argument instructions */
28797 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28802 ftype = (enum ix86_builtin_func_type) d->flag;
28803 def_builtin_const (d->mask, d->name, ftype, d->code);
28807 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28808 to return a pointer to VERSION_DECL if the outcome of the expression
28809 formed by PREDICATE_CHAIN is true. This function will be called during
28810 version dispatch to decide which function version to execute. It returns
28811 the basic block at the end, to which more conditions can be added. */
28814 add_condition_to_bb (tree function_decl, tree version_decl,
28815 tree predicate_chain, basic_block new_bb)
28817 gimple return_stmt;
28818 tree convert_expr, result_var;
28819 gimple convert_stmt;
28820 gimple call_cond_stmt;
28821 gimple if_else_stmt;
28823 basic_block bb1, bb2, bb3;
28826 tree cond_var, and_expr_var = NULL_TREE;
28829 tree predicate_decl, predicate_arg;
28831 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
28833 gcc_assert (new_bb != NULL);
28834 gseq = bb_seq (new_bb);
28837 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
28838 build_fold_addr_expr (version_decl));
28839 result_var = create_tmp_var (ptr_type_node, NULL);
28840 convert_stmt = gimple_build_assign (result_var, convert_expr);
28841 return_stmt = gimple_build_return (result_var);
28843 if (predicate_chain == NULL_TREE)
28845 gimple_seq_add_stmt (&gseq, convert_stmt);
28846 gimple_seq_add_stmt (&gseq, return_stmt);
28847 set_bb_seq (new_bb, gseq);
28848 gimple_set_bb (convert_stmt, new_bb);
28849 gimple_set_bb (return_stmt, new_bb);
28854 while (predicate_chain != NULL)
28856 cond_var = create_tmp_var (integer_type_node, NULL);
28857 predicate_decl = TREE_PURPOSE (predicate_chain);
28858 predicate_arg = TREE_VALUE (predicate_chain);
28859 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
28860 gimple_call_set_lhs (call_cond_stmt, cond_var);
28862 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
28863 gimple_set_bb (call_cond_stmt, new_bb);
28864 gimple_seq_add_stmt (&gseq, call_cond_stmt);
28866 predicate_chain = TREE_CHAIN (predicate_chain);
28868 if (and_expr_var == NULL)
28869 and_expr_var = cond_var;
28872 gimple assign_stmt;
28873 /* Use MIN_EXPR to check if any integer is zero?.
28874 and_expr_var = min_expr <cond_var, and_expr_var> */
28875 assign_stmt = gimple_build_assign (and_expr_var,
28876 build2 (MIN_EXPR, integer_type_node,
28877 cond_var, and_expr_var));
28879 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
28880 gimple_set_bb (assign_stmt, new_bb);
28881 gimple_seq_add_stmt (&gseq, assign_stmt);
28885 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
28887 NULL_TREE, NULL_TREE);
28888 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
28889 gimple_set_bb (if_else_stmt, new_bb);
28890 gimple_seq_add_stmt (&gseq, if_else_stmt);
28892 gimple_seq_add_stmt (&gseq, convert_stmt);
28893 gimple_seq_add_stmt (&gseq, return_stmt);
28894 set_bb_seq (new_bb, gseq);
28897 e12 = split_block (bb1, if_else_stmt);
28899 e12->flags &= ~EDGE_FALLTHRU;
28900 e12->flags |= EDGE_TRUE_VALUE;
28902 e23 = split_block (bb2, return_stmt);
28904 gimple_set_bb (convert_stmt, bb2);
28905 gimple_set_bb (return_stmt, bb2);
28908 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
28911 make_edge (bb2, EXIT_BLOCK_PTR, 0);
28918 /* This parses the attribute arguments to target in DECL and determines
28919 the right builtin to use to match the platform specification.
28920 It returns the priority value for this version decl. If PREDICATE_LIST
28921 is not NULL, it stores the list of cpu features that need to be checked
28922 before dispatching this function. */
28924 static unsigned int
28925 get_builtin_code_for_version (tree decl, tree *predicate_list)
28928 struct cl_target_option cur_target;
28930 struct cl_target_option *new_target;
28931 const char *arg_str = NULL;
28932 const char *attrs_str = NULL;
28933 char *tok_str = NULL;
28936 /* Priority of i386 features, greater value is higher priority. This is
28937 used to decide the order in which function dispatch must happen. For
28938 instance, a version specialized for SSE4.2 should be checked for dispatch
28939 before a version for SSE3, as SSE4.2 implies SSE3. */
28940 enum feature_priority
28961 enum feature_priority priority = P_ZERO;
28963 /* These are the target attribute strings for which a dispatcher is
28964 available, from fold_builtin_cpu. */
28966 static struct _feature_list
28968 const char *const name;
28969 const enum feature_priority priority;
28971 const feature_list[] =
28977 {"ssse3", P_SSSE3},
28978 {"sse4.1", P_SSE4_1},
28979 {"sse4.2", P_SSE4_2},
28980 {"popcnt", P_POPCNT},
28986 static unsigned int NUM_FEATURES
28987 = sizeof (feature_list) / sizeof (struct _feature_list);
28991 tree predicate_chain = NULL_TREE;
28992 tree predicate_decl, predicate_arg;
28994 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
28995 gcc_assert (attrs != NULL);
28997 attrs = TREE_VALUE (TREE_VALUE (attrs));
28999 gcc_assert (TREE_CODE (attrs) == STRING_CST);
29000 attrs_str = TREE_STRING_POINTER (attrs);
29002 /* Return priority zero for default function. */
29003 if (strcmp (attrs_str, "default") == 0)
29006 /* Handle arch= if specified. For priority, set it to be 1 more than
29007 the best instruction set the processor can handle. For instance, if
29008 there is a version for atom and a version for ssse3 (the highest ISA
29009 priority for atom), the atom version must be checked for dispatch
29010 before the ssse3 version. */
29011 if (strstr (attrs_str, "arch=") != NULL)
29013 cl_target_option_save (&cur_target, &global_options);
29014 target_node = ix86_valid_target_attribute_tree (attrs);
29016 gcc_assert (target_node);
29017 new_target = TREE_TARGET_OPTION (target_node);
29018 gcc_assert (new_target);
29020 if (new_target->arch_specified && new_target->arch > 0)
29022 switch (new_target->arch)
29024 case PROCESSOR_CORE2:
29026 priority = P_PROC_SSSE3;
29028 case PROCESSOR_COREI7:
29029 arg_str = "corei7";
29030 priority = P_PROC_SSE4_2;
29032 case PROCESSOR_ATOM:
29034 priority = P_PROC_SSSE3;
29036 case PROCESSOR_AMDFAM10:
29037 arg_str = "amdfam10h";
29038 priority = P_PROC_SSE4_a;
29040 case PROCESSOR_BDVER1:
29041 arg_str = "bdver1";
29042 priority = P_PROC_FMA;
29044 case PROCESSOR_BDVER2:
29045 arg_str = "bdver2";
29046 priority = P_PROC_FMA;
29051 cl_target_option_restore (&global_options, &cur_target);
29053 if (predicate_list && arg_str == NULL)
29055 error_at (DECL_SOURCE_LOCATION (decl),
29056 "No dispatcher found for the versioning attributes");
29060 if (predicate_list)
29062 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
29063 /* For a C string literal the length includes the trailing NULL. */
29064 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
29065 predicate_chain = tree_cons (predicate_decl, predicate_arg,
29070 /* Process feature name. */
29071 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
29072 strcpy (tok_str, attrs_str);
29073 token = strtok (tok_str, ",");
29074 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
29076 while (token != NULL)
29078 /* Do not process "arch=" */
29079 if (strncmp (token, "arch=", 5) == 0)
29081 token = strtok (NULL, ",");
29084 for (i = 0; i < NUM_FEATURES; ++i)
29086 if (strcmp (token, feature_list[i].name) == 0)
29088 if (predicate_list)
29090 predicate_arg = build_string_literal (
29091 strlen (feature_list[i].name) + 1,
29092 feature_list[i].name);
29093 predicate_chain = tree_cons (predicate_decl, predicate_arg,
29096 /* Find the maximum priority feature. */
29097 if (feature_list[i].priority > priority)
29098 priority = feature_list[i].priority;
29103 if (predicate_list && i == NUM_FEATURES)
29105 error_at (DECL_SOURCE_LOCATION (decl),
29106 "No dispatcher found for %s", token);
29109 token = strtok (NULL, ",");
29113 if (predicate_list && predicate_chain == NULL_TREE)
29115 error_at (DECL_SOURCE_LOCATION (decl),
29116 "No dispatcher found for the versioning attributes : %s",
29120 else if (predicate_list)
29122 predicate_chain = nreverse (predicate_chain);
29123 *predicate_list = predicate_chain;
29129 /* This compares the priority of target features in function DECL1
29130 and DECL2. It returns positive value if DECL1 is higher priority,
29131 negative value if DECL2 is higher priority and 0 if they are the
29135 ix86_compare_version_priority (tree decl1, tree decl2)
29137 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
29138 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
29140 return (int)priority1 - (int)priority2;
29143 /* V1 and V2 point to function versions with different priorities
29144 based on the target ISA. This function compares their priorities. */
29147 feature_compare (const void *v1, const void *v2)
29149 typedef struct _function_version_info
29152 tree predicate_chain;
29153 unsigned int dispatch_priority;
29154 } function_version_info;
29156 const function_version_info c1 = *(const function_version_info *)v1;
29157 const function_version_info c2 = *(const function_version_info *)v2;
29158 return (c2.dispatch_priority - c1.dispatch_priority);
29161 /* This function generates the dispatch function for
29162 multi-versioned functions. DISPATCH_DECL is the function which will
29163 contain the dispatch logic. FNDECLS are the function choices for
29164 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29165 in DISPATCH_DECL in which the dispatch code is generated. */
29168 dispatch_function_versions (tree dispatch_decl,
29170 basic_block *empty_bb)
29173 gimple ifunc_cpu_init_stmt;
29177 vec<tree> *fndecls;
29178 unsigned int num_versions = 0;
29179 unsigned int actual_versions = 0;
29182 struct _function_version_info
29185 tree predicate_chain;
29186 unsigned int dispatch_priority;
29187 }*function_version_info;
29189 gcc_assert (dispatch_decl != NULL
29190 && fndecls_p != NULL
29191 && empty_bb != NULL);
29193 /*fndecls_p is actually a vector. */
29194 fndecls = static_cast<vec<tree> *> (fndecls_p);
29196 /* At least one more version other than the default. */
29197 num_versions = fndecls->length ();
29198 gcc_assert (num_versions >= 2);
29200 function_version_info = (struct _function_version_info *)
29201 XNEWVEC (struct _function_version_info, (num_versions - 1));
29203 /* The first version in the vector is the default decl. */
29204 default_decl = (*fndecls)[0];
29206 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
29208 gseq = bb_seq (*empty_bb);
29209 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29210 constructors, so explicity call __builtin_cpu_init here. */
29211 ifunc_cpu_init_stmt = gimple_build_call_vec (
29212 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
29213 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
29214 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
29215 set_bb_seq (*empty_bb, gseq);
29220 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
29222 tree version_decl = ele;
29223 tree predicate_chain = NULL_TREE;
29224 unsigned int priority;
29225 /* Get attribute string, parse it and find the right predicate decl.
29226 The predicate function could be a lengthy combination of many
29227 features, like arch-type and various isa-variants. */
29228 priority = get_builtin_code_for_version (version_decl,
29231 if (predicate_chain == NULL_TREE)
29235 function_version_info [ix - 1].version_decl = version_decl;
29236 function_version_info [ix - 1].predicate_chain = predicate_chain;
29237 function_version_info [ix - 1].dispatch_priority = priority;
29240 /* Sort the versions according to descending order of dispatch priority. The
29241 priority is based on the ISA. This is not a perfect solution. There
29242 could still be ambiguity. If more than one function version is suitable
29243 to execute, which one should be dispatched? In future, allow the user
29244 to specify a dispatch priority next to the version. */
29245 qsort (function_version_info, actual_versions,
29246 sizeof (struct _function_version_info), feature_compare);
29248 for (i = 0; i < actual_versions; ++i)
29249 *empty_bb = add_condition_to_bb (dispatch_decl,
29250 function_version_info[i].version_decl,
29251 function_version_info[i].predicate_chain,
29254 /* dispatch default version at the end. */
29255 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
29258 free (function_version_info);
29262 /* Comparator function to be used in qsort routine to sort attribute
29263 specification strings to "target". */
29266 attr_strcmp (const void *v1, const void *v2)
29268 const char *c1 = *(char *const*)v1;
29269 const char *c2 = *(char *const*)v2;
29270 return strcmp (c1, c2);
29273 /* ARGLIST is the argument to target attribute. This function tokenizes
29274 the comma separated arguments, sorts them and returns a string which
29275 is a unique identifier for the comma separated arguments. It also
29276 replaces non-identifier characters "=,-" with "_". */
29279 sorted_attr_string (tree arglist)
29282 size_t str_len_sum = 0;
29283 char **args = NULL;
29284 char *attr_str, *ret_str;
29286 unsigned int argnum = 1;
29289 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
29291 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
29292 size_t len = strlen (str);
29293 str_len_sum += len + 1;
29294 if (arg != arglist)
29296 for (i = 0; i < strlen (str); i++)
29301 attr_str = XNEWVEC (char, str_len_sum);
29303 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
29305 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
29306 size_t len = strlen (str);
29307 memcpy (attr_str + str_len_sum, str, len);
29308 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
29309 str_len_sum += len + 1;
29312 /* Replace "=,-" with "_". */
29313 for (i = 0; i < strlen (attr_str); i++)
29314 if (attr_str[i] == '=' || attr_str[i]== '-')
29320 args = XNEWVEC (char *, argnum);
29323 attr = strtok (attr_str, ",");
29324 while (attr != NULL)
29328 attr = strtok (NULL, ",");
29331 qsort (args, argnum, sizeof (char *), attr_strcmp);
29333 ret_str = XNEWVEC (char, str_len_sum);
29335 for (i = 0; i < argnum; i++)
29337 size_t len = strlen (args[i]);
29338 memcpy (ret_str + str_len_sum, args[i], len);
29339 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
29340 str_len_sum += len + 1;
29344 XDELETEVEC (attr_str);
29348 /* This function changes the assembler name for functions that are
29349 versions. If DECL is a function version and has a "target"
29350 attribute, it appends the attribute string to its assembler name. */
29353 ix86_mangle_function_version_assembler_name (tree decl, tree id)
29356 const char *orig_name, *version_string;
29357 char *attr_str, *assembler_name;
29359 if (DECL_DECLARED_INLINE_P (decl)
29360 && lookup_attribute ("gnu_inline",
29361 DECL_ATTRIBUTES (decl)))
29362 error_at (DECL_SOURCE_LOCATION (decl),
29363 "Function versions cannot be marked as gnu_inline,"
29364 " bodies have to be generated");
29366 if (DECL_VIRTUAL_P (decl)
29367 || DECL_VINDEX (decl))
29368 sorry ("Virtual function multiversioning not supported");
29370 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
29372 /* target attribute string cannot be NULL. */
29373 gcc_assert (version_attr != NULL_TREE);
29375 orig_name = IDENTIFIER_POINTER (id);
29377 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
29379 if (strcmp (version_string, "default") == 0)
29382 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
29383 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
29385 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
29387 /* Allow assembler name to be modified if already set. */
29388 if (DECL_ASSEMBLER_NAME_SET_P (decl))
29389 SET_DECL_RTL (decl, NULL);
29391 tree ret = get_identifier (assembler_name);
29392 XDELETEVEC (attr_str);
29393 XDELETEVEC (assembler_name);
29397 /* This function returns true if FN1 and FN2 are versions of the same function,
29398 that is, the target strings of the function decls are different. This assumes
29399 that FN1 and FN2 have the same signature. */
29402 ix86_function_versions (tree fn1, tree fn2)
29405 char *target1, *target2;
29408 if (TREE_CODE (fn1) != FUNCTION_DECL
29409 || TREE_CODE (fn2) != FUNCTION_DECL)
29412 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
29413 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
29415 /* At least one function decl should have the target attribute specified. */
29416 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
29419 /* Diagnose missing target attribute if one of the decls is already
29420 multi-versioned. */
29421 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
29423 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
29425 if (attr2 != NULL_TREE)
29432 error_at (DECL_SOURCE_LOCATION (fn2),
29433 "missing %<target%> attribute for multi-versioned %D",
29435 error_at (DECL_SOURCE_LOCATION (fn1),
29436 "previous declaration of %D", fn1);
29437 /* Prevent diagnosing of the same error multiple times. */
29438 DECL_ATTRIBUTES (fn2)
29439 = tree_cons (get_identifier ("target"),
29440 copy_node (TREE_VALUE (attr1)),
29441 DECL_ATTRIBUTES (fn2));
29446 target1 = sorted_attr_string (TREE_VALUE (attr1));
29447 target2 = sorted_attr_string (TREE_VALUE (attr2));
29449 /* The sorted target strings must be different for fn1 and fn2
29451 if (strcmp (target1, target2) == 0)
29456 XDELETEVEC (target1);
29457 XDELETEVEC (target2);
29463 ix86_mangle_decl_assembler_name (tree decl, tree id)
29465 /* For function version, add the target suffix to the assembler name. */
29466 if (TREE_CODE (decl) == FUNCTION_DECL
29467 && DECL_FUNCTION_VERSIONED (decl))
29468 id = ix86_mangle_function_version_assembler_name (decl, id);
29469 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29470 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
29476 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29477 is true, append the full path name of the source file. */
29480 make_name (tree decl, const char *suffix, bool make_unique)
29482 char *global_var_name;
29485 const char *unique_name = NULL;
29487 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29489 /* Get a unique name that can be used globally without any chances
29490 of collision at link time. */
29492 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
29494 name_len = strlen (name) + strlen (suffix) + 2;
29497 name_len += strlen (unique_name) + 1;
29498 global_var_name = XNEWVEC (char, name_len);
29500 /* Use '.' to concatenate names as it is demangler friendly. */
29502 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
29505 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
29507 return global_var_name;
29510 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29512 /* Make a dispatcher declaration for the multi-versioned function DECL.
29513 Calls to DECL function will be replaced with calls to the dispatcher
29514 by the front-end. Return the decl created. */
29517 make_dispatcher_decl (const tree decl)
29521 tree fn_type, func_type;
29522 bool is_uniq = false;
29524 if (TREE_PUBLIC (decl) == 0)
29527 func_name = make_name (decl, "ifunc", is_uniq);
29529 fn_type = TREE_TYPE (decl);
29530 func_type = build_function_type (TREE_TYPE (fn_type),
29531 TYPE_ARG_TYPES (fn_type));
29533 func_decl = build_fn_decl (func_name, func_type);
29534 XDELETEVEC (func_name);
29535 TREE_USED (func_decl) = 1;
29536 DECL_CONTEXT (func_decl) = NULL_TREE;
29537 DECL_INITIAL (func_decl) = error_mark_node;
29538 DECL_ARTIFICIAL (func_decl) = 1;
29539 /* Mark this func as external, the resolver will flip it again if
29540 it gets generated. */
29541 DECL_EXTERNAL (func_decl) = 1;
29542 /* This will be of type IFUNCs have to be externally visible. */
29543 TREE_PUBLIC (func_decl) = 1;
29550 /* Returns true if decl is multi-versioned and DECL is the default function,
29551 that is it is not tagged with target specific optimization. */
29554 is_function_default_version (const tree decl)
29556 if (TREE_CODE (decl) != FUNCTION_DECL
29557 || !DECL_FUNCTION_VERSIONED (decl))
29559 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
29561 attr = TREE_VALUE (TREE_VALUE (attr));
29562 return (TREE_CODE (attr) == STRING_CST
29563 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
29566 /* Make a dispatcher declaration for the multi-versioned function DECL.
29567 Calls to DECL function will be replaced with calls to the dispatcher
29568 by the front-end. Returns the decl of the dispatcher function. */
29571 ix86_get_function_versions_dispatcher (void *decl)
29573 tree fn = (tree) decl;
29574 struct cgraph_node *node = NULL;
29575 struct cgraph_node *default_node = NULL;
29576 struct cgraph_function_version_info *node_v = NULL;
29577 struct cgraph_function_version_info *first_v = NULL;
29579 tree dispatch_decl = NULL;
29581 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29582 struct cgraph_function_version_info *it_v = NULL;
29583 struct cgraph_node *dispatcher_node = NULL;
29584 struct cgraph_function_version_info *dispatcher_version_info = NULL;
29587 struct cgraph_function_version_info *default_version_info = NULL;
29589 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
29591 node = cgraph_get_node (fn);
29592 gcc_assert (node != NULL);
29594 node_v = get_cgraph_node_version (node);
29595 gcc_assert (node_v != NULL);
29597 if (node_v->dispatcher_resolver != NULL)
29598 return node_v->dispatcher_resolver;
29600 /* Find the default version and make it the first node. */
29602 /* Go to the beginnig of the chain. */
29603 while (first_v->prev != NULL)
29604 first_v = first_v->prev;
29605 default_version_info = first_v;
29606 while (default_version_info != NULL)
29608 if (is_function_default_version
29609 (default_version_info->this_node->symbol.decl))
29611 default_version_info = default_version_info->next;
29614 /* If there is no default node, just return NULL. */
29615 if (default_version_info == NULL)
29618 /* Make default info the first node. */
29619 if (first_v != default_version_info)
29621 default_version_info->prev->next = default_version_info->next;
29622 if (default_version_info->next)
29623 default_version_info->next->prev = default_version_info->prev;
29624 first_v->prev = default_version_info;
29625 default_version_info->next = first_v;
29626 default_version_info->prev = NULL;
29629 default_node = default_version_info->this_node;
29631 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29632 /* Right now, the dispatching is done via ifunc. */
29633 dispatch_decl = make_dispatcher_decl (default_node->symbol.decl);
29635 dispatcher_node = cgraph_get_create_node (dispatch_decl);
29636 gcc_assert (dispatcher_node != NULL);
29637 dispatcher_node->dispatcher_function = 1;
29638 dispatcher_version_info
29639 = insert_new_cgraph_node_version (dispatcher_node);
29640 dispatcher_version_info->next = default_version_info;
29641 dispatcher_node->local.finalized = 1;
29643 /* Set the dispatcher for all the versions. */
29644 it_v = default_version_info;
29645 while (it_v != NULL)
29647 it_v->dispatcher_resolver = dispatch_decl;
29651 error_at (DECL_SOURCE_LOCATION (default_node->symbol.decl),
29652 "multiversioning needs ifunc which is not supported "
29653 "in this configuration");
29655 return dispatch_decl;
29658 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29662 make_attribute (const char *name, const char *arg_name, tree chain)
29665 tree attr_arg_name;
29669 attr_name = get_identifier (name);
29670 attr_arg_name = build_string (strlen (arg_name), arg_name);
29671 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
29672 attr = tree_cons (attr_name, attr_args, chain);
29676 /* Make the resolver function decl to dispatch the versions of
29677 a multi-versioned function, DEFAULT_DECL. Create an
29678 empty basic block in the resolver and store the pointer in
29679 EMPTY_BB. Return the decl of the resolver function. */
29682 make_resolver_func (const tree default_decl,
29683 const tree dispatch_decl,
29684 basic_block *empty_bb)
29686 char *resolver_name;
29687 tree decl, type, decl_name, t;
29688 bool is_uniq = false;
29690 /* IFUNC's have to be globally visible. So, if the default_decl is
29691 not, then the name of the IFUNC should be made unique. */
29692 if (TREE_PUBLIC (default_decl) == 0)
29695 /* Append the filename to the resolver function if the versions are
29696 not externally visible. This is because the resolver function has
29697 to be externally visible for the loader to find it. So, appending
29698 the filename will prevent conflicts with a resolver function from
29699 another module which is based on the same version name. */
29700 resolver_name = make_name (default_decl, "resolver", is_uniq);
29702 /* The resolver function should return a (void *). */
29703 type = build_function_type_list (ptr_type_node, NULL_TREE);
29705 decl = build_fn_decl (resolver_name, type);
29706 decl_name = get_identifier (resolver_name);
29707 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
29709 DECL_NAME (decl) = decl_name;
29710 TREE_USED (decl) = 1;
29711 DECL_ARTIFICIAL (decl) = 1;
29712 DECL_IGNORED_P (decl) = 0;
29713 /* IFUNC resolvers have to be externally visible. */
29714 TREE_PUBLIC (decl) = 1;
29715 DECL_UNINLINABLE (decl) = 0;
29717 /* Resolver is not external, body is generated. */
29718 DECL_EXTERNAL (decl) = 0;
29719 DECL_EXTERNAL (dispatch_decl) = 0;
29721 DECL_CONTEXT (decl) = NULL_TREE;
29722 DECL_INITIAL (decl) = make_node (BLOCK);
29723 DECL_STATIC_CONSTRUCTOR (decl) = 0;
29725 if (DECL_COMDAT_GROUP (default_decl)
29726 || TREE_PUBLIC (default_decl))
29728 /* In this case, each translation unit with a call to this
29729 versioned function will put out a resolver. Ensure it
29730 is comdat to keep just one copy. */
29731 DECL_COMDAT (decl) = 1;
29732 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
29734 /* Build result decl and add to function_decl. */
29735 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
29736 DECL_ARTIFICIAL (t) = 1;
29737 DECL_IGNORED_P (t) = 1;
29738 DECL_RESULT (decl) = t;
29740 gimplify_function_tree (decl);
29741 push_cfun (DECL_STRUCT_FUNCTION (decl));
29742 *empty_bb = init_lowered_empty_function (decl, false);
29744 cgraph_add_new_function (decl, true);
29745 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
29749 gcc_assert (dispatch_decl != NULL);
29750 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29751 DECL_ATTRIBUTES (dispatch_decl)
29752 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
29754 /* Create the alias for dispatch to resolver here. */
29755 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29756 cgraph_same_body_alias (NULL, dispatch_decl, decl);
29757 XDELETEVEC (resolver_name);
29761 /* Generate the dispatching code body to dispatch multi-versioned function
29762 DECL. The target hook is called to process the "target" attributes and
29763 provide the code to dispatch the right function at run-time. NODE points
29764 to the dispatcher decl whose body will be created. */
29767 ix86_generate_version_dispatcher_body (void *node_p)
29769 tree resolver_decl;
29770 basic_block empty_bb;
29771 vec<tree> fn_ver_vec = vNULL;
29772 tree default_ver_decl;
29773 struct cgraph_node *versn;
29774 struct cgraph_node *node;
29776 struct cgraph_function_version_info *node_version_info = NULL;
29777 struct cgraph_function_version_info *versn_info = NULL;
29779 node = (cgraph_node *)node_p;
29781 node_version_info = get_cgraph_node_version (node);
29782 gcc_assert (node->dispatcher_function
29783 && node_version_info != NULL);
29785 if (node_version_info->dispatcher_resolver)
29786 return node_version_info->dispatcher_resolver;
29788 /* The first version in the chain corresponds to the default version. */
29789 default_ver_decl = node_version_info->next->this_node->symbol.decl;
29791 /* node is going to be an alias, so remove the finalized bit. */
29792 node->local.finalized = false;
29794 resolver_decl = make_resolver_func (default_ver_decl,
29795 node->symbol.decl, &empty_bb);
29797 node_version_info->dispatcher_resolver = resolver_decl;
29799 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
29801 fn_ver_vec.create (2);
29803 for (versn_info = node_version_info->next; versn_info;
29804 versn_info = versn_info->next)
29806 versn = versn_info->this_node;
29807 /* Check for virtual functions here again, as by this time it should
29808 have been determined if this function needs a vtable index or
29809 not. This happens for methods in derived classes that override
29810 virtual methods in base classes but are not explicitly marked as
29812 if (DECL_VINDEX (versn->symbol.decl))
29813 sorry ("Virtual function multiversioning not supported");
29815 fn_ver_vec.safe_push (versn->symbol.decl);
29818 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
29819 fn_ver_vec.release ();
29820 rebuild_cgraph_edges ();
29822 return resolver_decl;
29824 /* This builds the processor_model struct type defined in
29825 libgcc/config/i386/cpuinfo.c */
29828 build_processor_model_struct (void)
29830 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29832 tree field = NULL_TREE, field_chain = NULL_TREE;
29834 tree type = make_node (RECORD_TYPE);
29836 /* The first 3 fields are unsigned int. */
29837 for (i = 0; i < 3; ++i)
29839 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
29840 get_identifier (field_name[i]), unsigned_type_node);
29841 if (field_chain != NULL_TREE)
29842 DECL_CHAIN (field) = field_chain;
29843 field_chain = field;
29846 /* The last field is an array of unsigned integers of size one. */
29847 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
29848 get_identifier (field_name[3]),
29849 build_array_type (unsigned_type_node,
29850 build_index_type (size_one_node)));
29851 if (field_chain != NULL_TREE)
29852 DECL_CHAIN (field) = field_chain;
29853 field_chain = field;
29855 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
29859 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29862 make_var_decl (tree type, const char *name)
29866 new_decl = build_decl (UNKNOWN_LOCATION,
29868 get_identifier(name),
29871 DECL_EXTERNAL (new_decl) = 1;
29872 TREE_STATIC (new_decl) = 1;
29873 TREE_PUBLIC (new_decl) = 1;
29874 DECL_INITIAL (new_decl) = 0;
29875 DECL_ARTIFICIAL (new_decl) = 0;
29876 DECL_PRESERVE_P (new_decl) = 1;
29878 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
29879 assemble_variable (new_decl, 0, 0, 0);
29884 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29885 into an integer defined in libgcc/config/i386/cpuinfo.c */
29888 fold_builtin_cpu (tree fndecl, tree *args)
29891 enum ix86_builtins fn_code = (enum ix86_builtins)
29892 DECL_FUNCTION_CODE (fndecl);
29893 tree param_string_cst = NULL;
29895 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29896 enum processor_features
29912 /* These are the values for vendor types and cpu types and subtypes
29913 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29914 the corresponding start value. */
29915 enum processor_model
29926 M_CPU_SUBTYPE_START,
29927 M_INTEL_COREI7_NEHALEM,
29928 M_INTEL_COREI7_WESTMERE,
29929 M_INTEL_COREI7_SANDYBRIDGE,
29930 M_AMDFAM10H_BARCELONA,
29931 M_AMDFAM10H_SHANGHAI,
29932 M_AMDFAM10H_ISTANBUL,
29933 M_AMDFAM15H_BDVER1,
29934 M_AMDFAM15H_BDVER2,
29938 static struct _arch_names_table
29940 const char *const name;
29941 const enum processor_model model;
29943 const arch_names_table[] =
29946 {"intel", M_INTEL},
29947 {"atom", M_INTEL_ATOM},
29948 {"slm", M_INTEL_SLM},
29949 {"core2", M_INTEL_CORE2},
29950 {"corei7", M_INTEL_COREI7},
29951 {"nehalem", M_INTEL_COREI7_NEHALEM},
29952 {"westmere", M_INTEL_COREI7_WESTMERE},
29953 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
29954 {"amdfam10h", M_AMDFAM10H},
29955 {"barcelona", M_AMDFAM10H_BARCELONA},
29956 {"shanghai", M_AMDFAM10H_SHANGHAI},
29957 {"istanbul", M_AMDFAM10H_ISTANBUL},
29958 {"amdfam15h", M_AMDFAM15H},
29959 {"bdver1", M_AMDFAM15H_BDVER1},
29960 {"bdver2", M_AMDFAM15H_BDVER2},
29961 {"bdver3", M_AMDFAM15H_BDVER3},
29964 static struct _isa_names_table
29966 const char *const name;
29967 const enum processor_features feature;
29969 const isa_names_table[] =
29973 {"popcnt", F_POPCNT},
29977 {"ssse3", F_SSSE3},
29978 {"sse4.1", F_SSE4_1},
29979 {"sse4.2", F_SSE4_2},
29984 tree __processor_model_type = build_processor_model_struct ();
29985 tree __cpu_model_var = make_var_decl (__processor_model_type,
29988 gcc_assert ((args != NULL) && (*args != NULL));
29990 param_string_cst = *args;
29991 while (param_string_cst
29992 && TREE_CODE (param_string_cst) != STRING_CST)
29994 /* *args must be a expr that can contain other EXPRS leading to a
29996 if (!EXPR_P (param_string_cst))
29998 error ("Parameter to builtin must be a string constant or literal");
29999 return integer_zero_node;
30001 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
30004 gcc_assert (param_string_cst);
30006 if (fn_code == IX86_BUILTIN_CPU_IS)
30012 unsigned int field_val = 0;
30013 unsigned int NUM_ARCH_NAMES
30014 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
30016 for (i = 0; i < NUM_ARCH_NAMES; i++)
30017 if (strcmp (arch_names_table[i].name,
30018 TREE_STRING_POINTER (param_string_cst)) == 0)
30021 if (i == NUM_ARCH_NAMES)
30023 error ("Parameter to builtin not valid: %s",
30024 TREE_STRING_POINTER (param_string_cst));
30025 return integer_zero_node;
30028 field = TYPE_FIELDS (__processor_model_type);
30029 field_val = arch_names_table[i].model;
30031 /* CPU types are stored in the next field. */
30032 if (field_val > M_CPU_TYPE_START
30033 && field_val < M_CPU_SUBTYPE_START)
30035 field = DECL_CHAIN (field);
30036 field_val -= M_CPU_TYPE_START;
30039 /* CPU subtypes are stored in the next field. */
30040 if (field_val > M_CPU_SUBTYPE_START)
30042 field = DECL_CHAIN ( DECL_CHAIN (field));
30043 field_val -= M_CPU_SUBTYPE_START;
30046 /* Get the appropriate field in __cpu_model. */
30047 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
30050 /* Check the value. */
30051 final = build2 (EQ_EXPR, unsigned_type_node, ref,
30052 build_int_cstu (unsigned_type_node, field_val));
30053 return build1 (CONVERT_EXPR, integer_type_node, final);
30055 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
30062 unsigned int field_val = 0;
30063 unsigned int NUM_ISA_NAMES
30064 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
30066 for (i = 0; i < NUM_ISA_NAMES; i++)
30067 if (strcmp (isa_names_table[i].name,
30068 TREE_STRING_POINTER (param_string_cst)) == 0)
30071 if (i == NUM_ISA_NAMES)
30073 error ("Parameter to builtin not valid: %s",
30074 TREE_STRING_POINTER (param_string_cst));
30075 return integer_zero_node;
30078 field = TYPE_FIELDS (__processor_model_type);
30079 /* Get the last field, which is __cpu_features. */
30080 while (DECL_CHAIN (field))
30081 field = DECL_CHAIN (field);
30083 /* Get the appropriate field: __cpu_model.__cpu_features */
30084 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
30087 /* Access the 0th element of __cpu_features array. */
30088 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
30089 integer_zero_node, NULL_TREE, NULL_TREE);
30091 field_val = (1 << isa_names_table[i].feature);
30092 /* Return __cpu_model.__cpu_features[0] & field_val */
30093 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
30094 build_int_cstu (unsigned_type_node, field_val));
30095 return build1 (CONVERT_EXPR, integer_type_node, final);
30097 gcc_unreachable ();
30101 ix86_fold_builtin (tree fndecl, int n_args,
30102 tree *args, bool ignore ATTRIBUTE_UNUSED)
30104 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
30106 enum ix86_builtins fn_code = (enum ix86_builtins)
30107 DECL_FUNCTION_CODE (fndecl);
30108 if (fn_code == IX86_BUILTIN_CPU_IS
30109 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
30111 gcc_assert (n_args == 1);
30112 return fold_builtin_cpu (fndecl, args);
30116 #ifdef SUBTARGET_FOLD_BUILTIN
30117 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
30123 /* Make builtins to detect cpu type and features supported. NAME is
30124 the builtin name, CODE is the builtin code, and FTYPE is the function
30125 type of the builtin. */
30128 make_cpu_type_builtin (const char* name, int code,
30129 enum ix86_builtin_func_type ftype, bool is_const)
30134 type = ix86_get_builtin_func_type (ftype);
30135 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30137 gcc_assert (decl != NULL_TREE);
30138 ix86_builtins[(int) code] = decl;
30139 TREE_READONLY (decl) = is_const;
30142 /* Make builtins to get CPU type and features supported. The created
30145 __builtin_cpu_init (), to detect cpu type and features,
30146 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30147 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30151 ix86_init_platform_type_builtins (void)
30153 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
30154 INT_FTYPE_VOID, false);
30155 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
30156 INT_FTYPE_PCCHAR, true);
30157 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
30158 INT_FTYPE_PCCHAR, true);
30161 /* Internal method for ix86_init_builtins. */
30164 ix86_init_builtins_va_builtins_abi (void)
30166 tree ms_va_ref, sysv_va_ref;
30167 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
30168 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
30169 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
30170 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
30174 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
30175 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
30176 ms_va_ref = build_reference_type (ms_va_list_type_node);
30178 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
30181 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
30182 fnvoid_va_start_ms =
30183 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
30184 fnvoid_va_end_sysv =
30185 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
30186 fnvoid_va_start_sysv =
30187 build_varargs_function_type_list (void_type_node, sysv_va_ref,
30189 fnvoid_va_copy_ms =
30190 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
30192 fnvoid_va_copy_sysv =
30193 build_function_type_list (void_type_node, sysv_va_ref,
30194 sysv_va_ref, NULL_TREE);
30196 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
30197 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
30198 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
30199 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
30200 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
30201 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
30202 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
30203 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
30204 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
30205 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
30206 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
30207 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
30211 ix86_init_builtin_types (void)
30213 tree float128_type_node, float80_type_node;
30215 /* The __float80 type. */
30216 float80_type_node = long_double_type_node;
30217 if (TYPE_MODE (float80_type_node) != XFmode)
30219 /* The __float80 type. */
30220 float80_type_node = make_node (REAL_TYPE);
30222 TYPE_PRECISION (float80_type_node) = 80;
30223 layout_type (float80_type_node);
30225 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
30227 /* The __float128 type. */
30228 float128_type_node = make_node (REAL_TYPE);
30229 TYPE_PRECISION (float128_type_node) = 128;
30230 layout_type (float128_type_node);
30231 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
30233 /* This macro is built by i386-builtin-types.awk. */
30234 DEFINE_BUILTIN_PRIMITIVE_TYPES;
30238 ix86_init_builtins (void)
30242 ix86_init_builtin_types ();
30244 /* Builtins to get CPU type and features. */
30245 ix86_init_platform_type_builtins ();
30247 /* TFmode support builtins. */
30248 def_builtin_const (0, "__builtin_infq",
30249 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
30250 def_builtin_const (0, "__builtin_huge_valq",
30251 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
30253 /* We will expand them to normal call if SSE isn't available since
30254 they are used by libgcc. */
30255 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
30256 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
30257 BUILT_IN_MD, "__fabstf2", NULL_TREE);
30258 TREE_READONLY (t) = 1;
30259 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
30261 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
30262 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
30263 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
30264 TREE_READONLY (t) = 1;
30265 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
30267 ix86_init_tm_builtins ();
30268 ix86_init_mmx_sse_builtins ();
30271 ix86_init_builtins_va_builtins_abi ();
30273 #ifdef SUBTARGET_INIT_BUILTINS
30274 SUBTARGET_INIT_BUILTINS;
30278 /* Return the ix86 builtin for CODE. */
30281 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
30283 if (code >= IX86_BUILTIN_MAX)
30284 return error_mark_node;
30286 return ix86_builtins[code];
30289 /* Errors in the source file can cause expand_expr to return const0_rtx
30290 where we expect a vector. To avoid crashing, use one of the vector
30291 clear instructions. */
30293 safe_vector_operand (rtx x, enum machine_mode mode)
30295 if (x == const0_rtx)
30296 x = CONST0_RTX (mode);
30300 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30303 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
30306 tree arg0 = CALL_EXPR_ARG (exp, 0);
30307 tree arg1 = CALL_EXPR_ARG (exp, 1);
30308 rtx op0 = expand_normal (arg0);
30309 rtx op1 = expand_normal (arg1);
30310 enum machine_mode tmode = insn_data[icode].operand[0].mode;
30311 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
30312 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
30314 if (VECTOR_MODE_P (mode0))
30315 op0 = safe_vector_operand (op0, mode0);
30316 if (VECTOR_MODE_P (mode1))
30317 op1 = safe_vector_operand (op1, mode1);
30319 if (optimize || !target
30320 || GET_MODE (target) != tmode
30321 || !insn_data[icode].operand[0].predicate (target, tmode))
30322 target = gen_reg_rtx (tmode);
30324 if (GET_MODE (op1) == SImode && mode1 == TImode)
30326 rtx x = gen_reg_rtx (V4SImode);
30327 emit_insn (gen_sse2_loadd (x, op1));
30328 op1 = gen_lowpart (TImode, x);
30331 if (!insn_data[icode].operand[1].predicate (op0, mode0))
30332 op0 = copy_to_mode_reg (mode0, op0);
30333 if (!insn_data[icode].operand[2].predicate (op1, mode1))
30334 op1 = copy_to_mode_reg (mode1, op1);
30336 pat = GEN_FCN (icode) (target, op0, op1);
30345 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30348 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
30349 enum ix86_builtin_func_type m_type,
30350 enum rtx_code sub_code)
30355 bool comparison_p = false;
30357 bool last_arg_constant = false;
30358 int num_memory = 0;
30361 enum machine_mode mode;
30364 enum machine_mode tmode = insn_data[icode].operand[0].mode;
30368 case MULTI_ARG_4_DF2_DI_I:
30369 case MULTI_ARG_4_DF2_DI_I1:
30370 case MULTI_ARG_4_SF2_SI_I:
30371 case MULTI_ARG_4_SF2_SI_I1:
30373 last_arg_constant = true;
30376 case MULTI_ARG_3_SF:
30377 case MULTI_ARG_3_DF:
30378 case MULTI_ARG_3_SF2:
30379 case MULTI_ARG_3_DF2:
30380 case MULTI_ARG_3_DI:
30381 case MULTI_ARG_3_SI:
30382 case MULTI_ARG_3_SI_DI:
30383 case MULTI_ARG_3_HI:
30384 case MULTI_ARG_3_HI_SI:
30385 case MULTI_ARG_3_QI:
30386 case MULTI_ARG_3_DI2:
30387 case MULTI_ARG_3_SI2:
30388 case MULTI_ARG_3_HI2:
30389 case MULTI_ARG_3_QI2:
30393 case MULTI_ARG_2_SF:
30394 case MULTI_ARG_2_DF:
30395 case MULTI_ARG_2_DI:
30396 case MULTI_ARG_2_SI:
30397 case MULTI_ARG_2_HI:
30398 case MULTI_ARG_2_QI:
30402 case MULTI_ARG_2_DI_IMM:
30403 case MULTI_ARG_2_SI_IMM:
30404 case MULTI_ARG_2_HI_IMM:
30405 case MULTI_ARG_2_QI_IMM:
30407 last_arg_constant = true;
30410 case MULTI_ARG_1_SF:
30411 case MULTI_ARG_1_DF:
30412 case MULTI_ARG_1_SF2:
30413 case MULTI_ARG_1_DF2:
30414 case MULTI_ARG_1_DI:
30415 case MULTI_ARG_1_SI:
30416 case MULTI_ARG_1_HI:
30417 case MULTI_ARG_1_QI:
30418 case MULTI_ARG_1_SI_DI:
30419 case MULTI_ARG_1_HI_DI:
30420 case MULTI_ARG_1_HI_SI:
30421 case MULTI_ARG_1_QI_DI:
30422 case MULTI_ARG_1_QI_SI:
30423 case MULTI_ARG_1_QI_HI:
30427 case MULTI_ARG_2_DI_CMP:
30428 case MULTI_ARG_2_SI_CMP:
30429 case MULTI_ARG_2_HI_CMP:
30430 case MULTI_ARG_2_QI_CMP:
30432 comparison_p = true;
30435 case MULTI_ARG_2_SF_TF:
30436 case MULTI_ARG_2_DF_TF:
30437 case MULTI_ARG_2_DI_TF:
30438 case MULTI_ARG_2_SI_TF:
30439 case MULTI_ARG_2_HI_TF:
30440 case MULTI_ARG_2_QI_TF:
30446 gcc_unreachable ();
30449 if (optimize || !target
30450 || GET_MODE (target) != tmode
30451 || !insn_data[icode].operand[0].predicate (target, tmode))
30452 target = gen_reg_rtx (tmode);
30454 gcc_assert (nargs <= 4);
30456 for (i = 0; i < nargs; i++)
30458 tree arg = CALL_EXPR_ARG (exp, i);
30459 rtx op = expand_normal (arg);
30460 int adjust = (comparison_p) ? 1 : 0;
30461 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
30463 if (last_arg_constant && i == nargs - 1)
30465 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
30467 enum insn_code new_icode = icode;
30470 case CODE_FOR_xop_vpermil2v2df3:
30471 case CODE_FOR_xop_vpermil2v4sf3:
30472 case CODE_FOR_xop_vpermil2v4df3:
30473 case CODE_FOR_xop_vpermil2v8sf3:
30474 error ("the last argument must be a 2-bit immediate");
30475 return gen_reg_rtx (tmode);
30476 case CODE_FOR_xop_rotlv2di3:
30477 new_icode = CODE_FOR_rotlv2di3;
30479 case CODE_FOR_xop_rotlv4si3:
30480 new_icode = CODE_FOR_rotlv4si3;
30482 case CODE_FOR_xop_rotlv8hi3:
30483 new_icode = CODE_FOR_rotlv8hi3;
30485 case CODE_FOR_xop_rotlv16qi3:
30486 new_icode = CODE_FOR_rotlv16qi3;
30488 if (CONST_INT_P (op))
30490 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
30491 op = GEN_INT (INTVAL (op) & mask);
30492 gcc_checking_assert
30493 (insn_data[icode].operand[i + 1].predicate (op, mode));
30497 gcc_checking_assert
30499 && insn_data[new_icode].operand[0].mode == tmode
30500 && insn_data[new_icode].operand[1].mode == tmode
30501 && insn_data[new_icode].operand[2].mode == mode
30502 && insn_data[new_icode].operand[0].predicate
30503 == insn_data[icode].operand[0].predicate
30504 && insn_data[new_icode].operand[1].predicate
30505 == insn_data[icode].operand[1].predicate);
30511 gcc_unreachable ();
30518 if (VECTOR_MODE_P (mode))
30519 op = safe_vector_operand (op, mode);
30521 /* If we aren't optimizing, only allow one memory operand to be
30523 if (memory_operand (op, mode))
30526 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
30529 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
30531 op = force_reg (mode, op);
30535 args[i].mode = mode;
30541 pat = GEN_FCN (icode) (target, args[0].op);
30546 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
30547 GEN_INT ((int)sub_code));
30548 else if (! comparison_p)
30549 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
30552 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
30556 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
30561 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
30565 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
30569 gcc_unreachable ();
30579 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30580 insns with vec_merge. */
30583 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
30587 tree arg0 = CALL_EXPR_ARG (exp, 0);
30588 rtx op1, op0 = expand_normal (arg0);
30589 enum machine_mode tmode = insn_data[icode].operand[0].mode;
30590 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
30592 if (optimize || !target
30593 || GET_MODE (target) != tmode
30594 || !insn_data[icode].operand[0].predicate (target, tmode))
30595 target = gen_reg_rtx (tmode);
30597 if (VECTOR_MODE_P (mode0))
30598 op0 = safe_vector_operand (op0, mode0);
30600 if ((optimize && !register_operand (op0, mode0))
30601 || !insn_data[icode].operand[1].predicate (op0, mode0))
30602 op0 = copy_to_mode_reg (mode0, op0);
30605 if (!insn_data[icode].operand[2].predicate (op1, mode0))
30606 op1 = copy_to_mode_reg (mode0, op1);
30608 pat = GEN_FCN (icode) (target, op0, op1);
30615 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30618 ix86_expand_sse_compare (const struct builtin_description *d,
30619 tree exp, rtx target, bool swap)
30622 tree arg0 = CALL_EXPR_ARG (exp, 0);
30623 tree arg1 = CALL_EXPR_ARG (exp, 1);
30624 rtx op0 = expand_normal (arg0);
30625 rtx op1 = expand_normal (arg1);
30627 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
30628 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
30629 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
30630 enum rtx_code comparison = d->comparison;
30632 if (VECTOR_MODE_P (mode0))
30633 op0 = safe_vector_operand (op0, mode0);
30634 if (VECTOR_MODE_P (mode1))
30635 op1 = safe_vector_operand (op1, mode1);
30637 /* Swap operands if we have a comparison that isn't available in
30641 rtx tmp = gen_reg_rtx (mode1);
30642 emit_move_insn (tmp, op1);
30647 if (optimize || !target
30648 || GET_MODE (target) != tmode
30649 || !insn_data[d->icode].operand[0].predicate (target, tmode))
30650 target = gen_reg_rtx (tmode);
30652 if ((optimize && !register_operand (op0, mode0))
30653 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
30654 op0 = copy_to_mode_reg (mode0, op0);
30655 if ((optimize && !register_operand (op1, mode1))
30656 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
30657 op1 = copy_to_mode_reg (mode1, op1);
30659 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
30660 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
30667 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30670 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
30674 tree arg0 = CALL_EXPR_ARG (exp, 0);
30675 tree arg1 = CALL_EXPR_ARG (exp, 1);
30676 rtx op0 = expand_normal (arg0);
30677 rtx op1 = expand_normal (arg1);
30678 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
30679 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
30680 enum rtx_code comparison = d->comparison;
30682 if (VECTOR_MODE_P (mode0))
30683 op0 = safe_vector_operand (op0, mode0);
30684 if (VECTOR_MODE_P (mode1))
30685 op1 = safe_vector_operand (op1, mode1);
30687 /* Swap operands if we have a comparison that isn't available in
30689 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
30696 target = gen_reg_rtx (SImode);
30697 emit_move_insn (target, const0_rtx);
30698 target = gen_rtx_SUBREG (QImode, target, 0);
30700 if ((optimize && !register_operand (op0, mode0))
30701 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30702 op0 = copy_to_mode_reg (mode0, op0);
30703 if ((optimize && !register_operand (op1, mode1))
30704 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
30705 op1 = copy_to_mode_reg (mode1, op1);
30707 pat = GEN_FCN (d->icode) (op0, op1);
30711 emit_insn (gen_rtx_SET (VOIDmode,
30712 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30713 gen_rtx_fmt_ee (comparison, QImode,
30717 return SUBREG_REG (target);
30720 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30723 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
30727 tree arg0 = CALL_EXPR_ARG (exp, 0);
30728 rtx op1, op0 = expand_normal (arg0);
30729 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
30730 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
30732 if (optimize || target == 0
30733 || GET_MODE (target) != tmode
30734 || !insn_data[d->icode].operand[0].predicate (target, tmode))
30735 target = gen_reg_rtx (tmode);
30737 if (VECTOR_MODE_P (mode0))
30738 op0 = safe_vector_operand (op0, mode0);
30740 if ((optimize && !register_operand (op0, mode0))
30741 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30742 op0 = copy_to_mode_reg (mode0, op0);
30744 op1 = GEN_INT (d->comparison);
30746 pat = GEN_FCN (d->icode) (target, op0, op1);
30754 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
30755 tree exp, rtx target)
30758 tree arg0 = CALL_EXPR_ARG (exp, 0);
30759 tree arg1 = CALL_EXPR_ARG (exp, 1);
30760 rtx op0 = expand_normal (arg0);
30761 rtx op1 = expand_normal (arg1);
30763 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
30764 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
30765 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
30767 if (optimize || target == 0
30768 || GET_MODE (target) != tmode
30769 || !insn_data[d->icode].operand[0].predicate (target, tmode))
30770 target = gen_reg_rtx (tmode);
30772 op0 = safe_vector_operand (op0, mode0);
30773 op1 = safe_vector_operand (op1, mode1);
30775 if ((optimize && !register_operand (op0, mode0))
30776 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30777 op0 = copy_to_mode_reg (mode0, op0);
30778 if ((optimize && !register_operand (op1, mode1))
30779 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
30780 op1 = copy_to_mode_reg (mode1, op1);
30782 op2 = GEN_INT (d->comparison);
30784 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
30791 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30794 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
30798 tree arg0 = CALL_EXPR_ARG (exp, 0);
30799 tree arg1 = CALL_EXPR_ARG (exp, 1);
30800 rtx op0 = expand_normal (arg0);
30801 rtx op1 = expand_normal (arg1);
30802 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
30803 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
30804 enum rtx_code comparison = d->comparison;
30806 if (VECTOR_MODE_P (mode0))
30807 op0 = safe_vector_operand (op0, mode0);
30808 if (VECTOR_MODE_P (mode1))
30809 op1 = safe_vector_operand (op1, mode1);
30811 target = gen_reg_rtx (SImode);
30812 emit_move_insn (target, const0_rtx);
30813 target = gen_rtx_SUBREG (QImode, target, 0);
30815 if ((optimize && !register_operand (op0, mode0))
30816 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30817 op0 = copy_to_mode_reg (mode0, op0);
30818 if ((optimize && !register_operand (op1, mode1))
30819 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
30820 op1 = copy_to_mode_reg (mode1, op1);
30822 pat = GEN_FCN (d->icode) (op0, op1);
30826 emit_insn (gen_rtx_SET (VOIDmode,
30827 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30828 gen_rtx_fmt_ee (comparison, QImode,
30832 return SUBREG_REG (target);
30835 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30838 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
30839 tree exp, rtx target)
30842 tree arg0 = CALL_EXPR_ARG (exp, 0);
30843 tree arg1 = CALL_EXPR_ARG (exp, 1);
30844 tree arg2 = CALL_EXPR_ARG (exp, 2);
30845 tree arg3 = CALL_EXPR_ARG (exp, 3);
30846 tree arg4 = CALL_EXPR_ARG (exp, 4);
30847 rtx scratch0, scratch1;
30848 rtx op0 = expand_normal (arg0);
30849 rtx op1 = expand_normal (arg1);
30850 rtx op2 = expand_normal (arg2);
30851 rtx op3 = expand_normal (arg3);
30852 rtx op4 = expand_normal (arg4);
30853 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
30855 tmode0 = insn_data[d->icode].operand[0].mode;
30856 tmode1 = insn_data[d->icode].operand[1].mode;
30857 modev2 = insn_data[d->icode].operand[2].mode;
30858 modei3 = insn_data[d->icode].operand[3].mode;
30859 modev4 = insn_data[d->icode].operand[4].mode;
30860 modei5 = insn_data[d->icode].operand[5].mode;
30861 modeimm = insn_data[d->icode].operand[6].mode;
30863 if (VECTOR_MODE_P (modev2))
30864 op0 = safe_vector_operand (op0, modev2);
30865 if (VECTOR_MODE_P (modev4))
30866 op2 = safe_vector_operand (op2, modev4);
30868 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
30869 op0 = copy_to_mode_reg (modev2, op0);
30870 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
30871 op1 = copy_to_mode_reg (modei3, op1);
30872 if ((optimize && !register_operand (op2, modev4))
30873 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
30874 op2 = copy_to_mode_reg (modev4, op2);
30875 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
30876 op3 = copy_to_mode_reg (modei5, op3);
30878 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
30880 error ("the fifth argument must be an 8-bit immediate");
30884 if (d->code == IX86_BUILTIN_PCMPESTRI128)
30886 if (optimize || !target
30887 || GET_MODE (target) != tmode0
30888 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
30889 target = gen_reg_rtx (tmode0);
30891 scratch1 = gen_reg_rtx (tmode1);
30893 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
30895 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
30897 if (optimize || !target
30898 || GET_MODE (target) != tmode1
30899 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
30900 target = gen_reg_rtx (tmode1);
30902 scratch0 = gen_reg_rtx (tmode0);
30904 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
30908 gcc_assert (d->flag);
30910 scratch0 = gen_reg_rtx (tmode0);
30911 scratch1 = gen_reg_rtx (tmode1);
30913 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
30923 target = gen_reg_rtx (SImode);
30924 emit_move_insn (target, const0_rtx);
30925 target = gen_rtx_SUBREG (QImode, target, 0);
30928 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30929 gen_rtx_fmt_ee (EQ, QImode,
30930 gen_rtx_REG ((enum machine_mode) d->flag,
30933 return SUBREG_REG (target);
30940 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30943 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
30944 tree exp, rtx target)
30947 tree arg0 = CALL_EXPR_ARG (exp, 0);
30948 tree arg1 = CALL_EXPR_ARG (exp, 1);
30949 tree arg2 = CALL_EXPR_ARG (exp, 2);
30950 rtx scratch0, scratch1;
30951 rtx op0 = expand_normal (arg0);
30952 rtx op1 = expand_normal (arg1);
30953 rtx op2 = expand_normal (arg2);
30954 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
30956 tmode0 = insn_data[d->icode].operand[0].mode;
30957 tmode1 = insn_data[d->icode].operand[1].mode;
30958 modev2 = insn_data[d->icode].operand[2].mode;
30959 modev3 = insn_data[d->icode].operand[3].mode;
30960 modeimm = insn_data[d->icode].operand[4].mode;
30962 if (VECTOR_MODE_P (modev2))
30963 op0 = safe_vector_operand (op0, modev2);
30964 if (VECTOR_MODE_P (modev3))
30965 op1 = safe_vector_operand (op1, modev3);
30967 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
30968 op0 = copy_to_mode_reg (modev2, op0);
30969 if ((optimize && !register_operand (op1, modev3))
30970 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
30971 op1 = copy_to_mode_reg (modev3, op1);
30973 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
30975 error ("the third argument must be an 8-bit immediate");
30979 if (d->code == IX86_BUILTIN_PCMPISTRI128)
30981 if (optimize || !target
30982 || GET_MODE (target) != tmode0
30983 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
30984 target = gen_reg_rtx (tmode0);
30986 scratch1 = gen_reg_rtx (tmode1);
30988 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
30990 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
30992 if (optimize || !target
30993 || GET_MODE (target) != tmode1
30994 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
30995 target = gen_reg_rtx (tmode1);
30997 scratch0 = gen_reg_rtx (tmode0);
30999 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
31003 gcc_assert (d->flag);
31005 scratch0 = gen_reg_rtx (tmode0);
31006 scratch1 = gen_reg_rtx (tmode1);
31008 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
31018 target = gen_reg_rtx (SImode);
31019 emit_move_insn (target, const0_rtx);
31020 target = gen_rtx_SUBREG (QImode, target, 0);
31023 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
31024 gen_rtx_fmt_ee (EQ, QImode,
31025 gen_rtx_REG ((enum machine_mode) d->flag,
31028 return SUBREG_REG (target);
31034 /* Subroutine of ix86_expand_builtin to take care of insns with
31035 variable number of operands. */
31038 ix86_expand_args_builtin (const struct builtin_description *d,
31039 tree exp, rtx target)
31041 rtx pat, real_target;
31042 unsigned int i, nargs;
31043 unsigned int nargs_constant = 0;
31044 int num_memory = 0;
31048 enum machine_mode mode;
31050 bool last_arg_count = false;
31051 enum insn_code icode = d->icode;
31052 const struct insn_data_d *insn_p = &insn_data[icode];
31053 enum machine_mode tmode = insn_p->operand[0].mode;
31054 enum machine_mode rmode = VOIDmode;
31056 enum rtx_code comparison = d->comparison;
31058 switch ((enum ix86_builtin_func_type) d->flag)
31060 case V2DF_FTYPE_V2DF_ROUND:
31061 case V4DF_FTYPE_V4DF_ROUND:
31062 case V4SF_FTYPE_V4SF_ROUND:
31063 case V8SF_FTYPE_V8SF_ROUND:
31064 case V4SI_FTYPE_V4SF_ROUND:
31065 case V8SI_FTYPE_V8SF_ROUND:
31066 return ix86_expand_sse_round (d, exp, target);
31067 case V4SI_FTYPE_V2DF_V2DF_ROUND:
31068 case V8SI_FTYPE_V4DF_V4DF_ROUND:
31069 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
31070 case INT_FTYPE_V8SF_V8SF_PTEST:
31071 case INT_FTYPE_V4DI_V4DI_PTEST:
31072 case INT_FTYPE_V4DF_V4DF_PTEST:
31073 case INT_FTYPE_V4SF_V4SF_PTEST:
31074 case INT_FTYPE_V2DI_V2DI_PTEST:
31075 case INT_FTYPE_V2DF_V2DF_PTEST:
31076 return ix86_expand_sse_ptest (d, exp, target);
31077 case FLOAT128_FTYPE_FLOAT128:
31078 case FLOAT_FTYPE_FLOAT:
31079 case INT_FTYPE_INT:
31080 case UINT64_FTYPE_INT:
31081 case UINT16_FTYPE_UINT16:
31082 case INT64_FTYPE_INT64:
31083 case INT64_FTYPE_V4SF:
31084 case INT64_FTYPE_V2DF:
31085 case INT_FTYPE_V16QI:
31086 case INT_FTYPE_V8QI:
31087 case INT_FTYPE_V8SF:
31088 case INT_FTYPE_V4DF:
31089 case INT_FTYPE_V4SF:
31090 case INT_FTYPE_V2DF:
31091 case INT_FTYPE_V32QI:
31092 case V16QI_FTYPE_V16QI:
31093 case V8SI_FTYPE_V8SF:
31094 case V8SI_FTYPE_V4SI:
31095 case V8HI_FTYPE_V8HI:
31096 case V8HI_FTYPE_V16QI:
31097 case V8QI_FTYPE_V8QI:
31098 case V8SF_FTYPE_V8SF:
31099 case V8SF_FTYPE_V8SI:
31100 case V8SF_FTYPE_V4SF:
31101 case V8SF_FTYPE_V8HI:
31102 case V4SI_FTYPE_V4SI:
31103 case V4SI_FTYPE_V16QI:
31104 case V4SI_FTYPE_V4SF:
31105 case V4SI_FTYPE_V8SI:
31106 case V4SI_FTYPE_V8HI:
31107 case V4SI_FTYPE_V4DF:
31108 case V4SI_FTYPE_V2DF:
31109 case V4HI_FTYPE_V4HI:
31110 case V4DF_FTYPE_V4DF:
31111 case V4DF_FTYPE_V4SI:
31112 case V4DF_FTYPE_V4SF:
31113 case V4DF_FTYPE_V2DF:
31114 case V4SF_FTYPE_V4SF:
31115 case V4SF_FTYPE_V4SI:
31116 case V4SF_FTYPE_V8SF:
31117 case V4SF_FTYPE_V4DF:
31118 case V4SF_FTYPE_V8HI:
31119 case V4SF_FTYPE_V2DF:
31120 case V2DI_FTYPE_V2DI:
31121 case V2DI_FTYPE_V16QI:
31122 case V2DI_FTYPE_V8HI:
31123 case V2DI_FTYPE_V4SI:
31124 case V2DF_FTYPE_V2DF:
31125 case V2DF_FTYPE_V4SI:
31126 case V2DF_FTYPE_V4DF:
31127 case V2DF_FTYPE_V4SF:
31128 case V2DF_FTYPE_V2SI:
31129 case V2SI_FTYPE_V2SI:
31130 case V2SI_FTYPE_V4SF:
31131 case V2SI_FTYPE_V2SF:
31132 case V2SI_FTYPE_V2DF:
31133 case V2SF_FTYPE_V2SF:
31134 case V2SF_FTYPE_V2SI:
31135 case V32QI_FTYPE_V32QI:
31136 case V32QI_FTYPE_V16QI:
31137 case V16HI_FTYPE_V16HI:
31138 case V16HI_FTYPE_V8HI:
31139 case V8SI_FTYPE_V8SI:
31140 case V16HI_FTYPE_V16QI:
31141 case V8SI_FTYPE_V16QI:
31142 case V4DI_FTYPE_V16QI:
31143 case V8SI_FTYPE_V8HI:
31144 case V4DI_FTYPE_V8HI:
31145 case V4DI_FTYPE_V4SI:
31146 case V4DI_FTYPE_V2DI:
31149 case V4SF_FTYPE_V4SF_VEC_MERGE:
31150 case V2DF_FTYPE_V2DF_VEC_MERGE:
31151 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
31152 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
31153 case V16QI_FTYPE_V16QI_V16QI:
31154 case V16QI_FTYPE_V8HI_V8HI:
31155 case V8QI_FTYPE_V8QI_V8QI:
31156 case V8QI_FTYPE_V4HI_V4HI:
31157 case V8HI_FTYPE_V8HI_V8HI:
31158 case V8HI_FTYPE_V16QI_V16QI:
31159 case V8HI_FTYPE_V4SI_V4SI:
31160 case V8SF_FTYPE_V8SF_V8SF:
31161 case V8SF_FTYPE_V8SF_V8SI:
31162 case V4SI_FTYPE_V4SI_V4SI:
31163 case V4SI_FTYPE_V8HI_V8HI:
31164 case V4SI_FTYPE_V4SF_V4SF:
31165 case V4SI_FTYPE_V2DF_V2DF:
31166 case V4HI_FTYPE_V4HI_V4HI:
31167 case V4HI_FTYPE_V8QI_V8QI:
31168 case V4HI_FTYPE_V2SI_V2SI:
31169 case V4DF_FTYPE_V4DF_V4DF:
31170 case V4DF_FTYPE_V4DF_V4DI:
31171 case V4SF_FTYPE_V4SF_V4SF:
31172 case V4SF_FTYPE_V4SF_V4SI:
31173 case V4SF_FTYPE_V4SF_V2SI:
31174 case V4SF_FTYPE_V4SF_V2DF:
31175 case V4SF_FTYPE_V4SF_DI:
31176 case V4SF_FTYPE_V4SF_SI:
31177 case V2DI_FTYPE_V2DI_V2DI:
31178 case V2DI_FTYPE_V16QI_V16QI:
31179 case V2DI_FTYPE_V4SI_V4SI:
31180 case V2UDI_FTYPE_V4USI_V4USI:
31181 case V2DI_FTYPE_V2DI_V16QI:
31182 case V2DI_FTYPE_V2DF_V2DF:
31183 case V2SI_FTYPE_V2SI_V2SI:
31184 case V2SI_FTYPE_V4HI_V4HI:
31185 case V2SI_FTYPE_V2SF_V2SF:
31186 case V2DF_FTYPE_V2DF_V2DF:
31187 case V2DF_FTYPE_V2DF_V4SF:
31188 case V2DF_FTYPE_V2DF_V2DI:
31189 case V2DF_FTYPE_V2DF_DI:
31190 case V2DF_FTYPE_V2DF_SI:
31191 case V2SF_FTYPE_V2SF_V2SF:
31192 case V1DI_FTYPE_V1DI_V1DI:
31193 case V1DI_FTYPE_V8QI_V8QI:
31194 case V1DI_FTYPE_V2SI_V2SI:
31195 case V32QI_FTYPE_V16HI_V16HI:
31196 case V16HI_FTYPE_V8SI_V8SI:
31197 case V32QI_FTYPE_V32QI_V32QI:
31198 case V16HI_FTYPE_V32QI_V32QI:
31199 case V16HI_FTYPE_V16HI_V16HI:
31200 case V8SI_FTYPE_V4DF_V4DF:
31201 case V8SI_FTYPE_V8SI_V8SI:
31202 case V8SI_FTYPE_V16HI_V16HI:
31203 case V4DI_FTYPE_V4DI_V4DI:
31204 case V4DI_FTYPE_V8SI_V8SI:
31205 case V4UDI_FTYPE_V8USI_V8USI:
31206 if (comparison == UNKNOWN)
31207 return ix86_expand_binop_builtin (icode, exp, target);
31210 case V4SF_FTYPE_V4SF_V4SF_SWAP:
31211 case V2DF_FTYPE_V2DF_V2DF_SWAP:
31212 gcc_assert (comparison != UNKNOWN);
31216 case V16HI_FTYPE_V16HI_V8HI_COUNT:
31217 case V16HI_FTYPE_V16HI_SI_COUNT:
31218 case V8SI_FTYPE_V8SI_V4SI_COUNT:
31219 case V8SI_FTYPE_V8SI_SI_COUNT:
31220 case V4DI_FTYPE_V4DI_V2DI_COUNT:
31221 case V4DI_FTYPE_V4DI_INT_COUNT:
31222 case V8HI_FTYPE_V8HI_V8HI_COUNT:
31223 case V8HI_FTYPE_V8HI_SI_COUNT:
31224 case V4SI_FTYPE_V4SI_V4SI_COUNT:
31225 case V4SI_FTYPE_V4SI_SI_COUNT:
31226 case V4HI_FTYPE_V4HI_V4HI_COUNT:
31227 case V4HI_FTYPE_V4HI_SI_COUNT:
31228 case V2DI_FTYPE_V2DI_V2DI_COUNT:
31229 case V2DI_FTYPE_V2DI_SI_COUNT:
31230 case V2SI_FTYPE_V2SI_V2SI_COUNT:
31231 case V2SI_FTYPE_V2SI_SI_COUNT:
31232 case V1DI_FTYPE_V1DI_V1DI_COUNT:
31233 case V1DI_FTYPE_V1DI_SI_COUNT:
31235 last_arg_count = true;
31237 case UINT64_FTYPE_UINT64_UINT64:
31238 case UINT_FTYPE_UINT_UINT:
31239 case UINT_FTYPE_UINT_USHORT:
31240 case UINT_FTYPE_UINT_UCHAR:
31241 case UINT16_FTYPE_UINT16_INT:
31242 case UINT8_FTYPE_UINT8_INT:
31245 case V2DI_FTYPE_V2DI_INT_CONVERT:
31248 nargs_constant = 1;
31250 case V4DI_FTYPE_V4DI_INT_CONVERT:
31253 nargs_constant = 1;
31255 case V8HI_FTYPE_V8HI_INT:
31256 case V8HI_FTYPE_V8SF_INT:
31257 case V8HI_FTYPE_V4SF_INT:
31258 case V8SF_FTYPE_V8SF_INT:
31259 case V4SI_FTYPE_V4SI_INT:
31260 case V4SI_FTYPE_V8SI_INT:
31261 case V4HI_FTYPE_V4HI_INT:
31262 case V4DF_FTYPE_V4DF_INT:
31263 case V4SF_FTYPE_V4SF_INT:
31264 case V4SF_FTYPE_V8SF_INT:
31265 case V2DI_FTYPE_V2DI_INT:
31266 case V2DF_FTYPE_V2DF_INT:
31267 case V2DF_FTYPE_V4DF_INT:
31268 case V16HI_FTYPE_V16HI_INT:
31269 case V8SI_FTYPE_V8SI_INT:
31270 case V4DI_FTYPE_V4DI_INT:
31271 case V2DI_FTYPE_V4DI_INT:
31273 nargs_constant = 1;
31275 case V16QI_FTYPE_V16QI_V16QI_V16QI:
31276 case V8SF_FTYPE_V8SF_V8SF_V8SF:
31277 case V4DF_FTYPE_V4DF_V4DF_V4DF:
31278 case V4SF_FTYPE_V4SF_V4SF_V4SF:
31279 case V2DF_FTYPE_V2DF_V2DF_V2DF:
31280 case V32QI_FTYPE_V32QI_V32QI_V32QI:
31283 case V32QI_FTYPE_V32QI_V32QI_INT:
31284 case V16HI_FTYPE_V16HI_V16HI_INT:
31285 case V16QI_FTYPE_V16QI_V16QI_INT:
31286 case V4DI_FTYPE_V4DI_V4DI_INT:
31287 case V8HI_FTYPE_V8HI_V8HI_INT:
31288 case V8SI_FTYPE_V8SI_V8SI_INT:
31289 case V8SI_FTYPE_V8SI_V4SI_INT:
31290 case V8SF_FTYPE_V8SF_V8SF_INT:
31291 case V8SF_FTYPE_V8SF_V4SF_INT:
31292 case V4SI_FTYPE_V4SI_V4SI_INT:
31293 case V4DF_FTYPE_V4DF_V4DF_INT:
31294 case V4DF_FTYPE_V4DF_V2DF_INT:
31295 case V4SF_FTYPE_V4SF_V4SF_INT:
31296 case V2DI_FTYPE_V2DI_V2DI_INT:
31297 case V4DI_FTYPE_V4DI_V2DI_INT:
31298 case V2DF_FTYPE_V2DF_V2DF_INT:
31300 nargs_constant = 1;
31302 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
31305 nargs_constant = 1;
31307 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
31310 nargs_constant = 1;
31312 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
31315 nargs_constant = 1;
31317 case V2DI_FTYPE_V2DI_UINT_UINT:
31319 nargs_constant = 2;
31321 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
31322 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
31323 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
31324 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
31326 nargs_constant = 1;
31328 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
31330 nargs_constant = 2;
31332 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
31333 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
31337 gcc_unreachable ();
31340 gcc_assert (nargs <= ARRAY_SIZE (args));
31342 if (comparison != UNKNOWN)
31344 gcc_assert (nargs == 2);
31345 return ix86_expand_sse_compare (d, exp, target, swap);
31348 if (rmode == VOIDmode || rmode == tmode)
31352 || GET_MODE (target) != tmode
31353 || !insn_p->operand[0].predicate (target, tmode))
31354 target = gen_reg_rtx (tmode);
31355 real_target = target;
31359 target = gen_reg_rtx (rmode);
31360 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
31363 for (i = 0; i < nargs; i++)
31365 tree arg = CALL_EXPR_ARG (exp, i);
31366 rtx op = expand_normal (arg);
31367 enum machine_mode mode = insn_p->operand[i + 1].mode;
31368 bool match = insn_p->operand[i + 1].predicate (op, mode);
31370 if (last_arg_count && (i + 1) == nargs)
31372 /* SIMD shift insns take either an 8-bit immediate or
31373 register as count. But builtin functions take int as
31374 count. If count doesn't match, we put it in register. */
31377 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
31378 if (!insn_p->operand[i + 1].predicate (op, mode))
31379 op = copy_to_reg (op);
31382 else if ((nargs - i) <= nargs_constant)
31387 case CODE_FOR_avx2_inserti128:
31388 case CODE_FOR_avx2_extracti128:
31389 error ("the last argument must be an 1-bit immediate");
31392 case CODE_FOR_sse4_1_roundsd:
31393 case CODE_FOR_sse4_1_roundss:
31395 case CODE_FOR_sse4_1_roundpd:
31396 case CODE_FOR_sse4_1_roundps:
31397 case CODE_FOR_avx_roundpd256:
31398 case CODE_FOR_avx_roundps256:
31400 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
31401 case CODE_FOR_sse4_1_roundps_sfix:
31402 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
31403 case CODE_FOR_avx_roundps_sfix256:
31405 case CODE_FOR_sse4_1_blendps:
31406 case CODE_FOR_avx_blendpd256:
31407 case CODE_FOR_avx_vpermilv4df:
31408 error ("the last argument must be a 4-bit immediate");
31411 case CODE_FOR_sse4_1_blendpd:
31412 case CODE_FOR_avx_vpermilv2df:
31413 case CODE_FOR_xop_vpermil2v2df3:
31414 case CODE_FOR_xop_vpermil2v4sf3:
31415 case CODE_FOR_xop_vpermil2v4df3:
31416 case CODE_FOR_xop_vpermil2v8sf3:
31417 error ("the last argument must be a 2-bit immediate");
31420 case CODE_FOR_avx_vextractf128v4df:
31421 case CODE_FOR_avx_vextractf128v8sf:
31422 case CODE_FOR_avx_vextractf128v8si:
31423 case CODE_FOR_avx_vinsertf128v4df:
31424 case CODE_FOR_avx_vinsertf128v8sf:
31425 case CODE_FOR_avx_vinsertf128v8si:
31426 error ("the last argument must be a 1-bit immediate");
31429 case CODE_FOR_avx_vmcmpv2df3:
31430 case CODE_FOR_avx_vmcmpv4sf3:
31431 case CODE_FOR_avx_cmpv2df3:
31432 case CODE_FOR_avx_cmpv4sf3:
31433 case CODE_FOR_avx_cmpv4df3:
31434 case CODE_FOR_avx_cmpv8sf3:
31435 error ("the last argument must be a 5-bit immediate");
31439 switch (nargs_constant)
31442 if ((nargs - i) == nargs_constant)
31444 error ("the next to last argument must be an 8-bit immediate");
31448 error ("the last argument must be an 8-bit immediate");
31451 gcc_unreachable ();
31458 if (VECTOR_MODE_P (mode))
31459 op = safe_vector_operand (op, mode);
31461 /* If we aren't optimizing, only allow one memory operand to
31463 if (memory_operand (op, mode))
31466 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
31468 if (optimize || !match || num_memory > 1)
31469 op = copy_to_mode_reg (mode, op);
31473 op = copy_to_reg (op);
31474 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
31479 args[i].mode = mode;
31485 pat = GEN_FCN (icode) (real_target, args[0].op);
31488 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
31491 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
31495 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
31496 args[2].op, args[3].op);
31499 gcc_unreachable ();
31509 /* Subroutine of ix86_expand_builtin to take care of special insns
31510 with variable number of operands. */
31513 ix86_expand_special_args_builtin (const struct builtin_description *d,
31514 tree exp, rtx target)
31518 unsigned int i, nargs, arg_adjust, memory;
31522 enum machine_mode mode;
31524 enum insn_code icode = d->icode;
31525 bool last_arg_constant = false;
31526 const struct insn_data_d *insn_p = &insn_data[icode];
31527 enum machine_mode tmode = insn_p->operand[0].mode;
31528 enum { load, store } klass;
31530 switch ((enum ix86_builtin_func_type) d->flag)
31532 case VOID_FTYPE_VOID:
31533 emit_insn (GEN_FCN (icode) (target));
31535 case VOID_FTYPE_UINT64:
31536 case VOID_FTYPE_UNSIGNED:
31542 case INT_FTYPE_VOID:
31543 case UINT64_FTYPE_VOID:
31544 case UNSIGNED_FTYPE_VOID:
31549 case UINT64_FTYPE_PUNSIGNED:
31550 case V2DI_FTYPE_PV2DI:
31551 case V4DI_FTYPE_PV4DI:
31552 case V32QI_FTYPE_PCCHAR:
31553 case V16QI_FTYPE_PCCHAR:
31554 case V8SF_FTYPE_PCV4SF:
31555 case V8SF_FTYPE_PCFLOAT:
31556 case V4SF_FTYPE_PCFLOAT:
31557 case V4DF_FTYPE_PCV2DF:
31558 case V4DF_FTYPE_PCDOUBLE:
31559 case V2DF_FTYPE_PCDOUBLE:
31560 case VOID_FTYPE_PVOID:
31565 case VOID_FTYPE_PV2SF_V4SF:
31566 case VOID_FTYPE_PV4DI_V4DI:
31567 case VOID_FTYPE_PV2DI_V2DI:
31568 case VOID_FTYPE_PCHAR_V32QI:
31569 case VOID_FTYPE_PCHAR_V16QI:
31570 case VOID_FTYPE_PFLOAT_V8SF:
31571 case VOID_FTYPE_PFLOAT_V4SF:
31572 case VOID_FTYPE_PDOUBLE_V4DF:
31573 case VOID_FTYPE_PDOUBLE_V2DF:
31574 case VOID_FTYPE_PLONGLONG_LONGLONG:
31575 case VOID_FTYPE_PULONGLONG_ULONGLONG:
31576 case VOID_FTYPE_PINT_INT:
31579 /* Reserve memory operand for target. */
31580 memory = ARRAY_SIZE (args);
31582 case V4SF_FTYPE_V4SF_PCV2SF:
31583 case V2DF_FTYPE_V2DF_PCDOUBLE:
31588 case V8SF_FTYPE_PCV8SF_V8SI:
31589 case V4DF_FTYPE_PCV4DF_V4DI:
31590 case V4SF_FTYPE_PCV4SF_V4SI:
31591 case V2DF_FTYPE_PCV2DF_V2DI:
31592 case V8SI_FTYPE_PCV8SI_V8SI:
31593 case V4DI_FTYPE_PCV4DI_V4DI:
31594 case V4SI_FTYPE_PCV4SI_V4SI:
31595 case V2DI_FTYPE_PCV2DI_V2DI:
31600 case VOID_FTYPE_PV8SF_V8SI_V8SF:
31601 case VOID_FTYPE_PV4DF_V4DI_V4DF:
31602 case VOID_FTYPE_PV4SF_V4SI_V4SF:
31603 case VOID_FTYPE_PV2DF_V2DI_V2DF:
31604 case VOID_FTYPE_PV8SI_V8SI_V8SI:
31605 case VOID_FTYPE_PV4DI_V4DI_V4DI:
31606 case VOID_FTYPE_PV4SI_V4SI_V4SI:
31607 case VOID_FTYPE_PV2DI_V2DI_V2DI:
31610 /* Reserve memory operand for target. */
31611 memory = ARRAY_SIZE (args);
31613 case VOID_FTYPE_UINT_UINT_UINT:
31614 case VOID_FTYPE_UINT64_UINT_UINT:
31615 case UCHAR_FTYPE_UINT_UINT_UINT:
31616 case UCHAR_FTYPE_UINT64_UINT_UINT:
31619 memory = ARRAY_SIZE (args);
31620 last_arg_constant = true;
31623 gcc_unreachable ();
31626 gcc_assert (nargs <= ARRAY_SIZE (args));
31628 if (klass == store)
31630 arg = CALL_EXPR_ARG (exp, 0);
31631 op = expand_normal (arg);
31632 gcc_assert (target == 0);
31635 op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
31636 target = gen_rtx_MEM (tmode, op);
31639 target = force_reg (tmode, op);
31647 || !register_operand (target, tmode)
31648 || GET_MODE (target) != tmode)
31649 target = gen_reg_rtx (tmode);
31652 for (i = 0; i < nargs; i++)
31654 enum machine_mode mode = insn_p->operand[i + 1].mode;
31657 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
31658 op = expand_normal (arg);
31659 match = insn_p->operand[i + 1].predicate (op, mode);
31661 if (last_arg_constant && (i + 1) == nargs)
31665 if (icode == CODE_FOR_lwp_lwpvalsi3
31666 || icode == CODE_FOR_lwp_lwpinssi3
31667 || icode == CODE_FOR_lwp_lwpvaldi3
31668 || icode == CODE_FOR_lwp_lwpinsdi3)
31669 error ("the last argument must be a 32-bit immediate");
31671 error ("the last argument must be an 8-bit immediate");
31679 /* This must be the memory operand. */
31680 op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
31681 op = gen_rtx_MEM (mode, op);
31682 gcc_assert (GET_MODE (op) == mode
31683 || GET_MODE (op) == VOIDmode);
31687 /* This must be register. */
31688 if (VECTOR_MODE_P (mode))
31689 op = safe_vector_operand (op, mode);
31691 gcc_assert (GET_MODE (op) == mode
31692 || GET_MODE (op) == VOIDmode);
31693 op = copy_to_mode_reg (mode, op);
31698 args[i].mode = mode;
31704 pat = GEN_FCN (icode) (target);
31707 pat = GEN_FCN (icode) (target, args[0].op);
31710 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
31713 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
31716 gcc_unreachable ();
31722 return klass == store ? 0 : target;
31725 /* Return the integer constant in ARG. Constrain it to be in the range
31726 of the subparts of VEC_TYPE; issue an error if not. */
31729 get_element_number (tree vec_type, tree arg)
31731 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
31733 if (!host_integerp (arg, 1)
31734 || (elt = tree_low_cst (arg, 1), elt > max))
31736 error ("selector must be an integer constant in the range 0..%wi", max);
31743 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31744 ix86_expand_vector_init. We DO have language-level syntax for this, in
31745 the form of (type){ init-list }. Except that since we can't place emms
31746 instructions from inside the compiler, we can't allow the use of MMX
31747 registers unless the user explicitly asks for it. So we do *not* define
31748 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31749 we have builtins invoked by mmintrin.h that gives us license to emit
31750 these sorts of instructions. */
31753 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
31755 enum machine_mode tmode = TYPE_MODE (type);
31756 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
31757 int i, n_elt = GET_MODE_NUNITS (tmode);
31758 rtvec v = rtvec_alloc (n_elt);
31760 gcc_assert (VECTOR_MODE_P (tmode));
31761 gcc_assert (call_expr_nargs (exp) == n_elt);
31763 for (i = 0; i < n_elt; ++i)
31765 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
31766 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
31769 if (!target || !register_operand (target, tmode))
31770 target = gen_reg_rtx (tmode);
31772 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
31776 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31777 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31778 had a language-level syntax for referencing vector elements. */
31781 ix86_expand_vec_ext_builtin (tree exp, rtx target)
31783 enum machine_mode tmode, mode0;
31788 arg0 = CALL_EXPR_ARG (exp, 0);
31789 arg1 = CALL_EXPR_ARG (exp, 1);
31791 op0 = expand_normal (arg0);
31792 elt = get_element_number (TREE_TYPE (arg0), arg1);
31794 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
31795 mode0 = TYPE_MODE (TREE_TYPE (arg0));
31796 gcc_assert (VECTOR_MODE_P (mode0));
31798 op0 = force_reg (mode0, op0);
31800 if (optimize || !target || !register_operand (target, tmode))
31801 target = gen_reg_rtx (tmode);
31803 ix86_expand_vector_extract (true, target, op0, elt);
31808 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31809 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31810 a language-level syntax for referencing vector elements. */
31813 ix86_expand_vec_set_builtin (tree exp)
31815 enum machine_mode tmode, mode1;
31816 tree arg0, arg1, arg2;
31818 rtx op0, op1, target;
31820 arg0 = CALL_EXPR_ARG (exp, 0);
31821 arg1 = CALL_EXPR_ARG (exp, 1);
31822 arg2 = CALL_EXPR_ARG (exp, 2);
31824 tmode = TYPE_MODE (TREE_TYPE (arg0));
31825 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
31826 gcc_assert (VECTOR_MODE_P (tmode));
31828 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
31829 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
31830 elt = get_element_number (TREE_TYPE (arg0), arg2);
31832 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
31833 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
31835 op0 = force_reg (tmode, op0);
31836 op1 = force_reg (mode1, op1);
31838 /* OP0 is the source of these builtin functions and shouldn't be
31839 modified. Create a copy, use it and return it as target. */
31840 target = gen_reg_rtx (tmode);
31841 emit_move_insn (target, op0);
31842 ix86_expand_vector_set (true, target, op1, elt);
31847 /* Expand an expression EXP that calls a built-in function,
31848 with result going to TARGET if that's convenient
31849 (and in mode MODE if that's convenient).
31850 SUBTARGET may be used as the target for computing one of EXP's operands.
31851 IGNORE is nonzero if the value is to be ignored. */
31854 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
31855 enum machine_mode mode ATTRIBUTE_UNUSED,
31856 int ignore ATTRIBUTE_UNUSED)
31858 const struct builtin_description *d;
31860 enum insn_code icode;
31861 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
31862 tree arg0, arg1, arg2, arg3, arg4;
31863 rtx op0, op1, op2, op3, op4, pat, insn;
31864 enum machine_mode mode0, mode1, mode2, mode3, mode4;
31865 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
31867 /* For CPU builtins that can be folded, fold first and expand the fold. */
31870 case IX86_BUILTIN_CPU_INIT:
31872 /* Make it call __cpu_indicator_init in libgcc. */
31873 tree call_expr, fndecl, type;
31874 type = build_function_type_list (integer_type_node, NULL_TREE);
31875 fndecl = build_fn_decl ("__cpu_indicator_init", type);
31876 call_expr = build_call_expr (fndecl, 0);
31877 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
31879 case IX86_BUILTIN_CPU_IS:
31880 case IX86_BUILTIN_CPU_SUPPORTS:
31882 tree arg0 = CALL_EXPR_ARG (exp, 0);
31883 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
31884 gcc_assert (fold_expr != NULL_TREE);
31885 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
31889 /* Determine whether the builtin function is available under the current ISA.
31890 Originally the builtin was not created if it wasn't applicable to the
31891 current ISA based on the command line switches. With function specific
31892 options, we need to check in the context of the function making the call
31893 whether it is supported. */
31894 if (ix86_builtins_isa[fcode].isa
31895 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
31897 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
31898 NULL, (enum fpmath_unit) 0, false);
31901 error ("%qE needs unknown isa option", fndecl);
31904 gcc_assert (opts != NULL);
31905 error ("%qE needs isa option %s", fndecl, opts);
31913 case IX86_BUILTIN_MASKMOVQ:
31914 case IX86_BUILTIN_MASKMOVDQU:
31915 icode = (fcode == IX86_BUILTIN_MASKMOVQ
31916 ? CODE_FOR_mmx_maskmovq
31917 : CODE_FOR_sse2_maskmovdqu);
31918 /* Note the arg order is different from the operand order. */
31919 arg1 = CALL_EXPR_ARG (exp, 0);
31920 arg2 = CALL_EXPR_ARG (exp, 1);
31921 arg0 = CALL_EXPR_ARG (exp, 2);
31922 op0 = expand_normal (arg0);
31923 op1 = expand_normal (arg1);
31924 op2 = expand_normal (arg2);
31925 mode0 = insn_data[icode].operand[0].mode;
31926 mode1 = insn_data[icode].operand[1].mode;
31927 mode2 = insn_data[icode].operand[2].mode;
31929 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
31930 op0 = gen_rtx_MEM (mode1, op0);
31932 if (!insn_data[icode].operand[0].predicate (op0, mode0))
31933 op0 = copy_to_mode_reg (mode0, op0);
31934 if (!insn_data[icode].operand[1].predicate (op1, mode1))
31935 op1 = copy_to_mode_reg (mode1, op1);
31936 if (!insn_data[icode].operand[2].predicate (op2, mode2))
31937 op2 = copy_to_mode_reg (mode2, op2);
31938 pat = GEN_FCN (icode) (op0, op1, op2);
31944 case IX86_BUILTIN_LDMXCSR:
31945 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
31946 target = assign_386_stack_local (SImode, SLOT_TEMP);
31947 emit_move_insn (target, op0);
31948 emit_insn (gen_sse_ldmxcsr (target));
31951 case IX86_BUILTIN_STMXCSR:
31952 target = assign_386_stack_local (SImode, SLOT_TEMP);
31953 emit_insn (gen_sse_stmxcsr (target));
31954 return copy_to_mode_reg (SImode, target);
31956 case IX86_BUILTIN_CLFLUSH:
31957 arg0 = CALL_EXPR_ARG (exp, 0);
31958 op0 = expand_normal (arg0);
31959 icode = CODE_FOR_sse2_clflush;
31960 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
31961 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
31963 emit_insn (gen_sse2_clflush (op0));
31966 case IX86_BUILTIN_MONITOR:
31967 arg0 = CALL_EXPR_ARG (exp, 0);
31968 arg1 = CALL_EXPR_ARG (exp, 1);
31969 arg2 = CALL_EXPR_ARG (exp, 2);
31970 op0 = expand_normal (arg0);
31971 op1 = expand_normal (arg1);
31972 op2 = expand_normal (arg2);
31974 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
31976 op1 = copy_to_mode_reg (SImode, op1);
31978 op2 = copy_to_mode_reg (SImode, op2);
31979 emit_insn (ix86_gen_monitor (op0, op1, op2));
31982 case IX86_BUILTIN_MWAIT:
31983 arg0 = CALL_EXPR_ARG (exp, 0);
31984 arg1 = CALL_EXPR_ARG (exp, 1);
31985 op0 = expand_normal (arg0);
31986 op1 = expand_normal (arg1);
31988 op0 = copy_to_mode_reg (SImode, op0);
31990 op1 = copy_to_mode_reg (SImode, op1);
31991 emit_insn (gen_sse3_mwait (op0, op1));
31994 case IX86_BUILTIN_VEC_INIT_V2SI:
31995 case IX86_BUILTIN_VEC_INIT_V4HI:
31996 case IX86_BUILTIN_VEC_INIT_V8QI:
31997 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
31999 case IX86_BUILTIN_VEC_EXT_V2DF:
32000 case IX86_BUILTIN_VEC_EXT_V2DI:
32001 case IX86_BUILTIN_VEC_EXT_V4SF:
32002 case IX86_BUILTIN_VEC_EXT_V4SI:
32003 case IX86_BUILTIN_VEC_EXT_V8HI:
32004 case IX86_BUILTIN_VEC_EXT_V2SI:
32005 case IX86_BUILTIN_VEC_EXT_V4HI:
32006 case IX86_BUILTIN_VEC_EXT_V16QI:
32007 return ix86_expand_vec_ext_builtin (exp, target);
32009 case IX86_BUILTIN_VEC_SET_V2DI:
32010 case IX86_BUILTIN_VEC_SET_V4SF:
32011 case IX86_BUILTIN_VEC_SET_V4SI:
32012 case IX86_BUILTIN_VEC_SET_V8HI:
32013 case IX86_BUILTIN_VEC_SET_V4HI:
32014 case IX86_BUILTIN_VEC_SET_V16QI:
32015 return ix86_expand_vec_set_builtin (exp);
32017 case IX86_BUILTIN_INFQ:
32018 case IX86_BUILTIN_HUGE_VALQ:
32020 REAL_VALUE_TYPE inf;
32024 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
32026 tmp = validize_mem (force_const_mem (mode, tmp));
32029 target = gen_reg_rtx (mode);
32031 emit_move_insn (target, tmp);
32035 case IX86_BUILTIN_RDPMC:
32036 case IX86_BUILTIN_RDTSC:
32037 case IX86_BUILTIN_RDTSCP:
32039 op0 = gen_reg_rtx (DImode);
32040 op1 = gen_reg_rtx (DImode);
32042 if (fcode == IX86_BUILTIN_RDPMC)
32044 arg0 = CALL_EXPR_ARG (exp, 0);
32045 op2 = expand_normal (arg0);
32046 if (!register_operand (op2, SImode))
32047 op2 = copy_to_mode_reg (SImode, op2);
32049 insn = (TARGET_64BIT
32050 ? gen_rdpmc_rex64 (op0, op1, op2)
32051 : gen_rdpmc (op0, op2));
32054 else if (fcode == IX86_BUILTIN_RDTSC)
32056 insn = (TARGET_64BIT
32057 ? gen_rdtsc_rex64 (op0, op1)
32058 : gen_rdtsc (op0));
32063 op2 = gen_reg_rtx (SImode);
32065 insn = (TARGET_64BIT
32066 ? gen_rdtscp_rex64 (op0, op1, op2)
32067 : gen_rdtscp (op0, op2));
32070 arg0 = CALL_EXPR_ARG (exp, 0);
32071 op4 = expand_normal (arg0);
32072 if (!address_operand (op4, VOIDmode))
32074 op4 = convert_memory_address (Pmode, op4);
32075 op4 = copy_addr_to_reg (op4);
32077 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
32081 target = gen_reg_rtx (mode);
32085 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
32086 op1, 1, OPTAB_DIRECT);
32087 op0 = expand_simple_binop (DImode, IOR, op0, op1,
32088 op0, 1, OPTAB_DIRECT);
32091 emit_move_insn (target, op0);
32094 case IX86_BUILTIN_FXSAVE:
32095 case IX86_BUILTIN_FXRSTOR:
32096 case IX86_BUILTIN_FXSAVE64:
32097 case IX86_BUILTIN_FXRSTOR64:
32100 case IX86_BUILTIN_FXSAVE:
32101 icode = CODE_FOR_fxsave;
32103 case IX86_BUILTIN_FXRSTOR:
32104 icode = CODE_FOR_fxrstor;
32106 case IX86_BUILTIN_FXSAVE64:
32107 icode = CODE_FOR_fxsave64;
32109 case IX86_BUILTIN_FXRSTOR64:
32110 icode = CODE_FOR_fxrstor64;
32113 gcc_unreachable ();
32116 arg0 = CALL_EXPR_ARG (exp, 0);
32117 op0 = expand_normal (arg0);
32119 if (!address_operand (op0, VOIDmode))
32121 op0 = convert_memory_address (Pmode, op0);
32122 op0 = copy_addr_to_reg (op0);
32124 op0 = gen_rtx_MEM (BLKmode, op0);
32126 pat = GEN_FCN (icode) (op0);
32131 case IX86_BUILTIN_XSAVE:
32132 case IX86_BUILTIN_XRSTOR:
32133 case IX86_BUILTIN_XSAVE64:
32134 case IX86_BUILTIN_XRSTOR64:
32135 case IX86_BUILTIN_XSAVEOPT:
32136 case IX86_BUILTIN_XSAVEOPT64:
32137 arg0 = CALL_EXPR_ARG (exp, 0);
32138 arg1 = CALL_EXPR_ARG (exp, 1);
32139 op0 = expand_normal (arg0);
32140 op1 = expand_normal (arg1);
32142 if (!address_operand (op0, VOIDmode))
32144 op0 = convert_memory_address (Pmode, op0);
32145 op0 = copy_addr_to_reg (op0);
32147 op0 = gen_rtx_MEM (BLKmode, op0);
32149 op1 = force_reg (DImode, op1);
32153 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
32154 NULL, 1, OPTAB_DIRECT);
32157 case IX86_BUILTIN_XSAVE:
32158 icode = CODE_FOR_xsave_rex64;
32160 case IX86_BUILTIN_XRSTOR:
32161 icode = CODE_FOR_xrstor_rex64;
32163 case IX86_BUILTIN_XSAVE64:
32164 icode = CODE_FOR_xsave64;
32166 case IX86_BUILTIN_XRSTOR64:
32167 icode = CODE_FOR_xrstor64;
32169 case IX86_BUILTIN_XSAVEOPT:
32170 icode = CODE_FOR_xsaveopt_rex64;
32172 case IX86_BUILTIN_XSAVEOPT64:
32173 icode = CODE_FOR_xsaveopt64;
32176 gcc_unreachable ();
32179 op2 = gen_lowpart (SImode, op2);
32180 op1 = gen_lowpart (SImode, op1);
32181 pat = GEN_FCN (icode) (op0, op1, op2);
32187 case IX86_BUILTIN_XSAVE:
32188 icode = CODE_FOR_xsave;
32190 case IX86_BUILTIN_XRSTOR:
32191 icode = CODE_FOR_xrstor;
32193 case IX86_BUILTIN_XSAVEOPT:
32194 icode = CODE_FOR_xsaveopt;
32197 gcc_unreachable ();
32199 pat = GEN_FCN (icode) (op0, op1);
32206 case IX86_BUILTIN_LLWPCB:
32207 arg0 = CALL_EXPR_ARG (exp, 0);
32208 op0 = expand_normal (arg0);
32209 icode = CODE_FOR_lwp_llwpcb;
32210 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
32211 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
32212 emit_insn (gen_lwp_llwpcb (op0));
32215 case IX86_BUILTIN_SLWPCB:
32216 icode = CODE_FOR_lwp_slwpcb;
32218 || !insn_data[icode].operand[0].predicate (target, Pmode))
32219 target = gen_reg_rtx (Pmode);
32220 emit_insn (gen_lwp_slwpcb (target));
32223 case IX86_BUILTIN_BEXTRI32:
32224 case IX86_BUILTIN_BEXTRI64:
32225 arg0 = CALL_EXPR_ARG (exp, 0);
32226 arg1 = CALL_EXPR_ARG (exp, 1);
32227 op0 = expand_normal (arg0);
32228 op1 = expand_normal (arg1);
32229 icode = (fcode == IX86_BUILTIN_BEXTRI32
32230 ? CODE_FOR_tbm_bextri_si
32231 : CODE_FOR_tbm_bextri_di);
32232 if (!CONST_INT_P (op1))
32234 error ("last argument must be an immediate");
32239 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
32240 unsigned char lsb_index = INTVAL (op1) & 0xFF;
32241 op1 = GEN_INT (length);
32242 op2 = GEN_INT (lsb_index);
32243 pat = GEN_FCN (icode) (target, op0, op1, op2);
32249 case IX86_BUILTIN_RDRAND16_STEP:
32250 icode = CODE_FOR_rdrandhi_1;
32254 case IX86_BUILTIN_RDRAND32_STEP:
32255 icode = CODE_FOR_rdrandsi_1;
32259 case IX86_BUILTIN_RDRAND64_STEP:
32260 icode = CODE_FOR_rdranddi_1;
32264 op0 = gen_reg_rtx (mode0);
32265 emit_insn (GEN_FCN (icode) (op0));
32267 arg0 = CALL_EXPR_ARG (exp, 0);
32268 op1 = expand_normal (arg0);
32269 if (!address_operand (op1, VOIDmode))
32271 op1 = convert_memory_address (Pmode, op1);
32272 op1 = copy_addr_to_reg (op1);
32274 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
32276 op1 = gen_reg_rtx (SImode);
32277 emit_move_insn (op1, CONST1_RTX (SImode));
32279 /* Emit SImode conditional move. */
32280 if (mode0 == HImode)
32282 op2 = gen_reg_rtx (SImode);
32283 emit_insn (gen_zero_extendhisi2 (op2, op0));
32285 else if (mode0 == SImode)
32288 op2 = gen_rtx_SUBREG (SImode, op0, 0);
32291 target = gen_reg_rtx (SImode);
32293 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
32295 emit_insn (gen_rtx_SET (VOIDmode, target,
32296 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
32299 case IX86_BUILTIN_RDSEED16_STEP:
32300 icode = CODE_FOR_rdseedhi_1;
32304 case IX86_BUILTIN_RDSEED32_STEP:
32305 icode = CODE_FOR_rdseedsi_1;
32309 case IX86_BUILTIN_RDSEED64_STEP:
32310 icode = CODE_FOR_rdseeddi_1;
32314 op0 = gen_reg_rtx (mode0);
32315 emit_insn (GEN_FCN (icode) (op0));
32317 arg0 = CALL_EXPR_ARG (exp, 0);
32318 op1 = expand_normal (arg0);
32319 if (!address_operand (op1, VOIDmode))
32321 op1 = convert_memory_address (Pmode, op1);
32322 op1 = copy_addr_to_reg (op1);
32324 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
32326 op2 = gen_reg_rtx (QImode);
32328 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
32330 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
32333 target = gen_reg_rtx (SImode);
32335 emit_insn (gen_zero_extendqisi2 (target, op2));
32338 case IX86_BUILTIN_ADDCARRYX32:
32339 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
32343 case IX86_BUILTIN_ADDCARRYX64:
32344 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
32348 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
32349 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
32350 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
32351 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
32353 op0 = gen_reg_rtx (QImode);
32355 /* Generate CF from input operand. */
32356 op1 = expand_normal (arg0);
32357 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
32358 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
32360 /* Gen ADCX instruction to compute X+Y+CF. */
32361 op2 = expand_normal (arg1);
32362 op3 = expand_normal (arg2);
32365 op2 = copy_to_mode_reg (mode0, op2);
32367 op3 = copy_to_mode_reg (mode0, op3);
32369 op0 = gen_reg_rtx (mode0);
32371 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
32372 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
32373 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
32375 /* Store the result. */
32376 op4 = expand_normal (arg3);
32377 if (!address_operand (op4, VOIDmode))
32379 op4 = convert_memory_address (Pmode, op4);
32380 op4 = copy_addr_to_reg (op4);
32382 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
32384 /* Return current CF value. */
32386 target = gen_reg_rtx (QImode);
32388 PUT_MODE (pat, QImode);
32389 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
32392 case IX86_BUILTIN_GATHERSIV2DF:
32393 icode = CODE_FOR_avx2_gathersiv2df;
32395 case IX86_BUILTIN_GATHERSIV4DF:
32396 icode = CODE_FOR_avx2_gathersiv4df;
32398 case IX86_BUILTIN_GATHERDIV2DF:
32399 icode = CODE_FOR_avx2_gatherdiv2df;
32401 case IX86_BUILTIN_GATHERDIV4DF:
32402 icode = CODE_FOR_avx2_gatherdiv4df;
32404 case IX86_BUILTIN_GATHERSIV4SF:
32405 icode = CODE_FOR_avx2_gathersiv4sf;
32407 case IX86_BUILTIN_GATHERSIV8SF:
32408 icode = CODE_FOR_avx2_gathersiv8sf;
32410 case IX86_BUILTIN_GATHERDIV4SF:
32411 icode = CODE_FOR_avx2_gatherdiv4sf;
32413 case IX86_BUILTIN_GATHERDIV8SF:
32414 icode = CODE_FOR_avx2_gatherdiv8sf;
32416 case IX86_BUILTIN_GATHERSIV2DI:
32417 icode = CODE_FOR_avx2_gathersiv2di;
32419 case IX86_BUILTIN_GATHERSIV4DI:
32420 icode = CODE_FOR_avx2_gathersiv4di;
32422 case IX86_BUILTIN_GATHERDIV2DI:
32423 icode = CODE_FOR_avx2_gatherdiv2di;
32425 case IX86_BUILTIN_GATHERDIV4DI:
32426 icode = CODE_FOR_avx2_gatherdiv4di;
32428 case IX86_BUILTIN_GATHERSIV4SI:
32429 icode = CODE_FOR_avx2_gathersiv4si;
32431 case IX86_BUILTIN_GATHERSIV8SI:
32432 icode = CODE_FOR_avx2_gathersiv8si;
32434 case IX86_BUILTIN_GATHERDIV4SI:
32435 icode = CODE_FOR_avx2_gatherdiv4si;
32437 case IX86_BUILTIN_GATHERDIV8SI:
32438 icode = CODE_FOR_avx2_gatherdiv8si;
32440 case IX86_BUILTIN_GATHERALTSIV4DF:
32441 icode = CODE_FOR_avx2_gathersiv4df;
32443 case IX86_BUILTIN_GATHERALTDIV8SF:
32444 icode = CODE_FOR_avx2_gatherdiv8sf;
32446 case IX86_BUILTIN_GATHERALTSIV4DI:
32447 icode = CODE_FOR_avx2_gathersiv4di;
32449 case IX86_BUILTIN_GATHERALTDIV8SI:
32450 icode = CODE_FOR_avx2_gatherdiv8si;
32454 arg0 = CALL_EXPR_ARG (exp, 0);
32455 arg1 = CALL_EXPR_ARG (exp, 1);
32456 arg2 = CALL_EXPR_ARG (exp, 2);
32457 arg3 = CALL_EXPR_ARG (exp, 3);
32458 arg4 = CALL_EXPR_ARG (exp, 4);
32459 op0 = expand_normal (arg0);
32460 op1 = expand_normal (arg1);
32461 op2 = expand_normal (arg2);
32462 op3 = expand_normal (arg3);
32463 op4 = expand_normal (arg4);
32464 /* Note the arg order is different from the operand order. */
32465 mode0 = insn_data[icode].operand[1].mode;
32466 mode2 = insn_data[icode].operand[3].mode;
32467 mode3 = insn_data[icode].operand[4].mode;
32468 mode4 = insn_data[icode].operand[5].mode;
32470 if (target == NULL_RTX
32471 || GET_MODE (target) != insn_data[icode].operand[0].mode)
32472 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
32474 subtarget = target;
32476 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
32477 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
32479 rtx half = gen_reg_rtx (V4SImode);
32480 if (!nonimmediate_operand (op2, V8SImode))
32481 op2 = copy_to_mode_reg (V8SImode, op2);
32482 emit_insn (gen_vec_extract_lo_v8si (half, op2));
32485 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
32486 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
32488 rtx (*gen) (rtx, rtx);
32489 rtx half = gen_reg_rtx (mode0);
32490 if (mode0 == V4SFmode)
32491 gen = gen_vec_extract_lo_v8sf;
32493 gen = gen_vec_extract_lo_v8si;
32494 if (!nonimmediate_operand (op0, GET_MODE (op0)))
32495 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
32496 emit_insn (gen (half, op0));
32498 if (!nonimmediate_operand (op3, GET_MODE (op3)))
32499 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
32500 emit_insn (gen (half, op3));
32504 /* Force memory operand only with base register here. But we
32505 don't want to do it on memory operand for other builtin
32507 op1 = force_reg (Pmode, convert_to_mode (Pmode, op1, 1));
32509 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32510 op0 = copy_to_mode_reg (mode0, op0);
32511 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
32512 op1 = copy_to_mode_reg (Pmode, op1);
32513 if (!insn_data[icode].operand[3].predicate (op2, mode2))
32514 op2 = copy_to_mode_reg (mode2, op2);
32515 if (!insn_data[icode].operand[4].predicate (op3, mode3))
32516 op3 = copy_to_mode_reg (mode3, op3);
32517 if (!insn_data[icode].operand[5].predicate (op4, mode4))
32519 error ("last argument must be scale 1, 2, 4, 8");
32523 /* Optimize. If mask is known to have all high bits set,
32524 replace op0 with pc_rtx to signal that the instruction
32525 overwrites the whole destination and doesn't use its
32526 previous contents. */
32529 if (TREE_CODE (arg3) == VECTOR_CST)
32531 unsigned int negative = 0;
32532 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
32534 tree cst = VECTOR_CST_ELT (arg3, i);
32535 if (TREE_CODE (cst) == INTEGER_CST
32536 && tree_int_cst_sign_bit (cst))
32538 else if (TREE_CODE (cst) == REAL_CST
32539 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
32542 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
32545 else if (TREE_CODE (arg3) == SSA_NAME)
32547 /* Recognize also when mask is like:
32548 __v2df src = _mm_setzero_pd ();
32549 __v2df mask = _mm_cmpeq_pd (src, src);
32551 __v8sf src = _mm256_setzero_ps ();
32552 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32553 as that is a cheaper way to load all ones into
32554 a register than having to load a constant from
32556 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
32557 if (is_gimple_call (def_stmt))
32559 tree fndecl = gimple_call_fndecl (def_stmt);
32561 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32562 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
32564 case IX86_BUILTIN_CMPPD:
32565 case IX86_BUILTIN_CMPPS:
32566 case IX86_BUILTIN_CMPPD256:
32567 case IX86_BUILTIN_CMPPS256:
32568 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
32571 case IX86_BUILTIN_CMPEQPD:
32572 case IX86_BUILTIN_CMPEQPS:
32573 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
32574 && initializer_zerop (gimple_call_arg (def_stmt,
32585 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
32590 if (fcode == IX86_BUILTIN_GATHERDIV8SF
32591 || fcode == IX86_BUILTIN_GATHERDIV8SI)
32593 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
32594 ? V4SFmode : V4SImode;
32595 if (target == NULL_RTX)
32596 target = gen_reg_rtx (tmode);
32597 if (tmode == V4SFmode)
32598 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
32600 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
32603 target = subtarget;
32607 case IX86_BUILTIN_XABORT:
32608 icode = CODE_FOR_xabort;
32609 arg0 = CALL_EXPR_ARG (exp, 0);
32610 op0 = expand_normal (arg0);
32611 mode0 = insn_data[icode].operand[0].mode;
32612 if (!insn_data[icode].operand[0].predicate (op0, mode0))
32614 error ("the xabort's argument must be an 8-bit immediate");
32617 emit_insn (gen_xabort (op0));
32624 for (i = 0, d = bdesc_special_args;
32625 i < ARRAY_SIZE (bdesc_special_args);
32627 if (d->code == fcode)
32628 return ix86_expand_special_args_builtin (d, exp, target);
32630 for (i = 0, d = bdesc_args;
32631 i < ARRAY_SIZE (bdesc_args);
32633 if (d->code == fcode)
32636 case IX86_BUILTIN_FABSQ:
32637 case IX86_BUILTIN_COPYSIGNQ:
32639 /* Emit a normal call if SSE isn't available. */
32640 return expand_call (exp, target, ignore);
32642 return ix86_expand_args_builtin (d, exp, target);
32645 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
32646 if (d->code == fcode)
32647 return ix86_expand_sse_comi (d, exp, target);
32649 for (i = 0, d = bdesc_pcmpestr;
32650 i < ARRAY_SIZE (bdesc_pcmpestr);
32652 if (d->code == fcode)
32653 return ix86_expand_sse_pcmpestr (d, exp, target);
32655 for (i = 0, d = bdesc_pcmpistr;
32656 i < ARRAY_SIZE (bdesc_pcmpistr);
32658 if (d->code == fcode)
32659 return ix86_expand_sse_pcmpistr (d, exp, target);
32661 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
32662 if (d->code == fcode)
32663 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
32664 (enum ix86_builtin_func_type)
32665 d->flag, d->comparison);
32667 gcc_unreachable ();
32670 /* Returns a function decl for a vectorized version of the builtin function
32671 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32672 if it is not available. */
32675 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
32678 enum machine_mode in_mode, out_mode;
32680 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
32682 if (TREE_CODE (type_out) != VECTOR_TYPE
32683 || TREE_CODE (type_in) != VECTOR_TYPE
32684 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
32687 out_mode = TYPE_MODE (TREE_TYPE (type_out));
32688 out_n = TYPE_VECTOR_SUBPARTS (type_out);
32689 in_mode = TYPE_MODE (TREE_TYPE (type_in));
32690 in_n = TYPE_VECTOR_SUBPARTS (type_in);
32694 case BUILT_IN_SQRT:
32695 if (out_mode == DFmode && in_mode == DFmode)
32697 if (out_n == 2 && in_n == 2)
32698 return ix86_builtins[IX86_BUILTIN_SQRTPD];
32699 else if (out_n == 4 && in_n == 4)
32700 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
32704 case BUILT_IN_SQRTF:
32705 if (out_mode == SFmode && in_mode == SFmode)
32707 if (out_n == 4 && in_n == 4)
32708 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
32709 else if (out_n == 8 && in_n == 8)
32710 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
32714 case BUILT_IN_IFLOOR:
32715 case BUILT_IN_LFLOOR:
32716 case BUILT_IN_LLFLOOR:
32717 /* The round insn does not trap on denormals. */
32718 if (flag_trapping_math || !TARGET_ROUND)
32721 if (out_mode == SImode && in_mode == DFmode)
32723 if (out_n == 4 && in_n == 2)
32724 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
32725 else if (out_n == 8 && in_n == 4)
32726 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
32730 case BUILT_IN_IFLOORF:
32731 case BUILT_IN_LFLOORF:
32732 case BUILT_IN_LLFLOORF:
32733 /* The round insn does not trap on denormals. */
32734 if (flag_trapping_math || !TARGET_ROUND)
32737 if (out_mode == SImode && in_mode == SFmode)
32739 if (out_n == 4 && in_n == 4)
32740 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
32741 else if (out_n == 8 && in_n == 8)
32742 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
32746 case BUILT_IN_ICEIL:
32747 case BUILT_IN_LCEIL:
32748 case BUILT_IN_LLCEIL:
32749 /* The round insn does not trap on denormals. */
32750 if (flag_trapping_math || !TARGET_ROUND)
32753 if (out_mode == SImode && in_mode == DFmode)
32755 if (out_n == 4 && in_n == 2)
32756 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
32757 else if (out_n == 8 && in_n == 4)
32758 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
32762 case BUILT_IN_ICEILF:
32763 case BUILT_IN_LCEILF:
32764 case BUILT_IN_LLCEILF:
32765 /* The round insn does not trap on denormals. */
32766 if (flag_trapping_math || !TARGET_ROUND)
32769 if (out_mode == SImode && in_mode == SFmode)
32771 if (out_n == 4 && in_n == 4)
32772 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
32773 else if (out_n == 8 && in_n == 8)
32774 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
32778 case BUILT_IN_IRINT:
32779 case BUILT_IN_LRINT:
32780 case BUILT_IN_LLRINT:
32781 if (out_mode == SImode && in_mode == DFmode)
32783 if (out_n == 4 && in_n == 2)
32784 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
32785 else if (out_n == 8 && in_n == 4)
32786 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
32790 case BUILT_IN_IRINTF:
32791 case BUILT_IN_LRINTF:
32792 case BUILT_IN_LLRINTF:
32793 if (out_mode == SImode && in_mode == SFmode)
32795 if (out_n == 4 && in_n == 4)
32796 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
32797 else if (out_n == 8 && in_n == 8)
32798 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
32802 case BUILT_IN_IROUND:
32803 case BUILT_IN_LROUND:
32804 case BUILT_IN_LLROUND:
32805 /* The round insn does not trap on denormals. */
32806 if (flag_trapping_math || !TARGET_ROUND)
32809 if (out_mode == SImode && in_mode == DFmode)
32811 if (out_n == 4 && in_n == 2)
32812 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
32813 else if (out_n == 8 && in_n == 4)
32814 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
32818 case BUILT_IN_IROUNDF:
32819 case BUILT_IN_LROUNDF:
32820 case BUILT_IN_LLROUNDF:
32821 /* The round insn does not trap on denormals. */
32822 if (flag_trapping_math || !TARGET_ROUND)
32825 if (out_mode == SImode && in_mode == SFmode)
32827 if (out_n == 4 && in_n == 4)
32828 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
32829 else if (out_n == 8 && in_n == 8)
32830 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
32834 case BUILT_IN_COPYSIGN:
32835 if (out_mode == DFmode && in_mode == DFmode)
32837 if (out_n == 2 && in_n == 2)
32838 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
32839 else if (out_n == 4 && in_n == 4)
32840 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
32844 case BUILT_IN_COPYSIGNF:
32845 if (out_mode == SFmode && in_mode == SFmode)
32847 if (out_n == 4 && in_n == 4)
32848 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
32849 else if (out_n == 8 && in_n == 8)
32850 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
32854 case BUILT_IN_FLOOR:
32855 /* The round insn does not trap on denormals. */
32856 if (flag_trapping_math || !TARGET_ROUND)
32859 if (out_mode == DFmode && in_mode == DFmode)
32861 if (out_n == 2 && in_n == 2)
32862 return ix86_builtins[IX86_BUILTIN_FLOORPD];
32863 else if (out_n == 4 && in_n == 4)
32864 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
32868 case BUILT_IN_FLOORF:
32869 /* The round insn does not trap on denormals. */
32870 if (flag_trapping_math || !TARGET_ROUND)
32873 if (out_mode == SFmode && in_mode == SFmode)
32875 if (out_n == 4 && in_n == 4)
32876 return ix86_builtins[IX86_BUILTIN_FLOORPS];
32877 else if (out_n == 8 && in_n == 8)
32878 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
32882 case BUILT_IN_CEIL:
32883 /* The round insn does not trap on denormals. */
32884 if (flag_trapping_math || !TARGET_ROUND)
32887 if (out_mode == DFmode && in_mode == DFmode)
32889 if (out_n == 2 && in_n == 2)
32890 return ix86_builtins[IX86_BUILTIN_CEILPD];
32891 else if (out_n == 4 && in_n == 4)
32892 return ix86_builtins[IX86_BUILTIN_CEILPD256];
32896 case BUILT_IN_CEILF:
32897 /* The round insn does not trap on denormals. */
32898 if (flag_trapping_math || !TARGET_ROUND)
32901 if (out_mode == SFmode && in_mode == SFmode)
32903 if (out_n == 4 && in_n == 4)
32904 return ix86_builtins[IX86_BUILTIN_CEILPS];
32905 else if (out_n == 8 && in_n == 8)
32906 return ix86_builtins[IX86_BUILTIN_CEILPS256];
32910 case BUILT_IN_TRUNC:
32911 /* The round insn does not trap on denormals. */
32912 if (flag_trapping_math || !TARGET_ROUND)
32915 if (out_mode == DFmode && in_mode == DFmode)
32917 if (out_n == 2 && in_n == 2)
32918 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
32919 else if (out_n == 4 && in_n == 4)
32920 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
32924 case BUILT_IN_TRUNCF:
32925 /* The round insn does not trap on denormals. */
32926 if (flag_trapping_math || !TARGET_ROUND)
32929 if (out_mode == SFmode && in_mode == SFmode)
32931 if (out_n == 4 && in_n == 4)
32932 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
32933 else if (out_n == 8 && in_n == 8)
32934 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
32938 case BUILT_IN_RINT:
32939 /* The round insn does not trap on denormals. */
32940 if (flag_trapping_math || !TARGET_ROUND)
32943 if (out_mode == DFmode && in_mode == DFmode)
32945 if (out_n == 2 && in_n == 2)
32946 return ix86_builtins[IX86_BUILTIN_RINTPD];
32947 else if (out_n == 4 && in_n == 4)
32948 return ix86_builtins[IX86_BUILTIN_RINTPD256];
32952 case BUILT_IN_RINTF:
32953 /* The round insn does not trap on denormals. */
32954 if (flag_trapping_math || !TARGET_ROUND)
32957 if (out_mode == SFmode && in_mode == SFmode)
32959 if (out_n == 4 && in_n == 4)
32960 return ix86_builtins[IX86_BUILTIN_RINTPS];
32961 else if (out_n == 8 && in_n == 8)
32962 return ix86_builtins[IX86_BUILTIN_RINTPS256];
32966 case BUILT_IN_ROUND:
32967 /* The round insn does not trap on denormals. */
32968 if (flag_trapping_math || !TARGET_ROUND)
32971 if (out_mode == DFmode && in_mode == DFmode)
32973 if (out_n == 2 && in_n == 2)
32974 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
32975 else if (out_n == 4 && in_n == 4)
32976 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
32980 case BUILT_IN_ROUNDF:
32981 /* The round insn does not trap on denormals. */
32982 if (flag_trapping_math || !TARGET_ROUND)
32985 if (out_mode == SFmode && in_mode == SFmode)
32987 if (out_n == 4 && in_n == 4)
32988 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
32989 else if (out_n == 8 && in_n == 8)
32990 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
32995 if (out_mode == DFmode && in_mode == DFmode)
32997 if (out_n == 2 && in_n == 2)
32998 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
32999 if (out_n == 4 && in_n == 4)
33000 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
33004 case BUILT_IN_FMAF:
33005 if (out_mode == SFmode && in_mode == SFmode)
33007 if (out_n == 4 && in_n == 4)
33008 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
33009 if (out_n == 8 && in_n == 8)
33010 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
33018 /* Dispatch to a handler for a vectorization library. */
33019 if (ix86_veclib_handler)
33020 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
33026 /* Handler for an SVML-style interface to
33027 a library with vectorized intrinsics. */
33030 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
33033 tree fntype, new_fndecl, args;
33036 enum machine_mode el_mode, in_mode;
33039 /* The SVML is suitable for unsafe math only. */
33040 if (!flag_unsafe_math_optimizations)
33043 el_mode = TYPE_MODE (TREE_TYPE (type_out));
33044 n = TYPE_VECTOR_SUBPARTS (type_out);
33045 in_mode = TYPE_MODE (TREE_TYPE (type_in));
33046 in_n = TYPE_VECTOR_SUBPARTS (type_in);
33047 if (el_mode != in_mode
33055 case BUILT_IN_LOG10:
33057 case BUILT_IN_TANH:
33059 case BUILT_IN_ATAN:
33060 case BUILT_IN_ATAN2:
33061 case BUILT_IN_ATANH:
33062 case BUILT_IN_CBRT:
33063 case BUILT_IN_SINH:
33065 case BUILT_IN_ASINH:
33066 case BUILT_IN_ASIN:
33067 case BUILT_IN_COSH:
33069 case BUILT_IN_ACOSH:
33070 case BUILT_IN_ACOS:
33071 if (el_mode != DFmode || n != 2)
33075 case BUILT_IN_EXPF:
33076 case BUILT_IN_LOGF:
33077 case BUILT_IN_LOG10F:
33078 case BUILT_IN_POWF:
33079 case BUILT_IN_TANHF:
33080 case BUILT_IN_TANF:
33081 case BUILT_IN_ATANF:
33082 case BUILT_IN_ATAN2F:
33083 case BUILT_IN_ATANHF:
33084 case BUILT_IN_CBRTF:
33085 case BUILT_IN_SINHF:
33086 case BUILT_IN_SINF:
33087 case BUILT_IN_ASINHF:
33088 case BUILT_IN_ASINF:
33089 case BUILT_IN_COSHF:
33090 case BUILT_IN_COSF:
33091 case BUILT_IN_ACOSHF:
33092 case BUILT_IN_ACOSF:
33093 if (el_mode != SFmode || n != 4)
33101 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
33103 if (fn == BUILT_IN_LOGF)
33104 strcpy (name, "vmlsLn4");
33105 else if (fn == BUILT_IN_LOG)
33106 strcpy (name, "vmldLn2");
33109 sprintf (name, "vmls%s", bname+10);
33110 name[strlen (name)-1] = '4';
33113 sprintf (name, "vmld%s2", bname+10);
33115 /* Convert to uppercase. */
33119 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
33121 args = TREE_CHAIN (args))
33125 fntype = build_function_type_list (type_out, type_in, NULL);
33127 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
33129 /* Build a function declaration for the vectorized function. */
33130 new_fndecl = build_decl (BUILTINS_LOCATION,
33131 FUNCTION_DECL, get_identifier (name), fntype);
33132 TREE_PUBLIC (new_fndecl) = 1;
33133 DECL_EXTERNAL (new_fndecl) = 1;
33134 DECL_IS_NOVOPS (new_fndecl) = 1;
33135 TREE_READONLY (new_fndecl) = 1;
33140 /* Handler for an ACML-style interface to
33141 a library with vectorized intrinsics. */
33144 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
33146 char name[20] = "__vr.._";
33147 tree fntype, new_fndecl, args;
33150 enum machine_mode el_mode, in_mode;
33153 /* The ACML is 64bits only and suitable for unsafe math only as
33154 it does not correctly support parts of IEEE with the required
33155 precision such as denormals. */
33157 || !flag_unsafe_math_optimizations)
33160 el_mode = TYPE_MODE (TREE_TYPE (type_out));
33161 n = TYPE_VECTOR_SUBPARTS (type_out);
33162 in_mode = TYPE_MODE (TREE_TYPE (type_in));
33163 in_n = TYPE_VECTOR_SUBPARTS (type_in);
33164 if (el_mode != in_mode
33174 case BUILT_IN_LOG2:
33175 case BUILT_IN_LOG10:
33178 if (el_mode != DFmode
33183 case BUILT_IN_SINF:
33184 case BUILT_IN_COSF:
33185 case BUILT_IN_EXPF:
33186 case BUILT_IN_POWF:
33187 case BUILT_IN_LOGF:
33188 case BUILT_IN_LOG2F:
33189 case BUILT_IN_LOG10F:
33192 if (el_mode != SFmode
33201 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
33202 sprintf (name + 7, "%s", bname+10);
33205 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
33207 args = TREE_CHAIN (args))
33211 fntype = build_function_type_list (type_out, type_in, NULL);
33213 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
33215 /* Build a function declaration for the vectorized function. */
33216 new_fndecl = build_decl (BUILTINS_LOCATION,
33217 FUNCTION_DECL, get_identifier (name), fntype);
33218 TREE_PUBLIC (new_fndecl) = 1;
33219 DECL_EXTERNAL (new_fndecl) = 1;
33220 DECL_IS_NOVOPS (new_fndecl) = 1;
33221 TREE_READONLY (new_fndecl) = 1;
33226 /* Returns a decl of a function that implements gather load with
33227 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33228 Return NULL_TREE if it is not available. */
33231 ix86_vectorize_builtin_gather (const_tree mem_vectype,
33232 const_tree index_type, int scale)
33235 enum ix86_builtins code;
33240 if ((TREE_CODE (index_type) != INTEGER_TYPE
33241 && !POINTER_TYPE_P (index_type))
33242 || (TYPE_MODE (index_type) != SImode
33243 && TYPE_MODE (index_type) != DImode))
33246 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
33249 /* v*gather* insn sign extends index to pointer mode. */
33250 if (TYPE_PRECISION (index_type) < POINTER_SIZE
33251 && TYPE_UNSIGNED (index_type))
33256 || (scale & (scale - 1)) != 0)
33259 si = TYPE_MODE (index_type) == SImode;
33260 switch (TYPE_MODE (mem_vectype))
33263 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
33266 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
33269 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
33272 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
33275 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
33278 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
33281 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
33284 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
33290 return ix86_builtins[code];
33293 /* Returns a code for a target-specific builtin that implements
33294 reciprocal of the function, or NULL_TREE if not available. */
33297 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
33298 bool sqrt ATTRIBUTE_UNUSED)
33300 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
33301 && flag_finite_math_only && !flag_trapping_math
33302 && flag_unsafe_math_optimizations))
33306 /* Machine dependent builtins. */
33309 /* Vectorized version of sqrt to rsqrt conversion. */
33310 case IX86_BUILTIN_SQRTPS_NR:
33311 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
33313 case IX86_BUILTIN_SQRTPS_NR256:
33314 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
33320 /* Normal builtins. */
33323 /* Sqrt to rsqrt conversion. */
33324 case BUILT_IN_SQRTF:
33325 return ix86_builtins[IX86_BUILTIN_RSQRTF];
33332 /* Helper for avx_vpermilps256_operand et al. This is also used by
33333 the expansion functions to turn the parallel back into a mask.
33334 The return value is 0 for no match and the imm8+1 for a match. */
33337 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
33339 unsigned i, nelt = GET_MODE_NUNITS (mode);
33341 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
33343 if (XVECLEN (par, 0) != (int) nelt)
33346 /* Validate that all of the elements are constants, and not totally
33347 out of range. Copy the data into an integral array to make the
33348 subsequent checks easier. */
33349 for (i = 0; i < nelt; ++i)
33351 rtx er = XVECEXP (par, 0, i);
33352 unsigned HOST_WIDE_INT ei;
33354 if (!CONST_INT_P (er))
33365 /* In the 256-bit DFmode case, we can only move elements within
33367 for (i = 0; i < 2; ++i)
33371 mask |= ipar[i] << i;
33373 for (i = 2; i < 4; ++i)
33377 mask |= (ipar[i] - 2) << i;
33382 /* In the 256-bit SFmode case, we have full freedom of movement
33383 within the low 128-bit lane, but the high 128-bit lane must
33384 mirror the exact same pattern. */
33385 for (i = 0; i < 4; ++i)
33386 if (ipar[i] + 4 != ipar[i + 4])
33393 /* In the 128-bit case, we've full freedom in the placement of
33394 the elements from the source operand. */
33395 for (i = 0; i < nelt; ++i)
33396 mask |= ipar[i] << (i * (nelt / 2));
33400 gcc_unreachable ();
33403 /* Make sure success has a non-zero value by adding one. */
33407 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33408 the expansion functions to turn the parallel back into a mask.
33409 The return value is 0 for no match and the imm8+1 for a match. */
33412 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
33414 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
33416 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
33418 if (XVECLEN (par, 0) != (int) nelt)
33421 /* Validate that all of the elements are constants, and not totally
33422 out of range. Copy the data into an integral array to make the
33423 subsequent checks easier. */
33424 for (i = 0; i < nelt; ++i)
33426 rtx er = XVECEXP (par, 0, i);
33427 unsigned HOST_WIDE_INT ei;
33429 if (!CONST_INT_P (er))
33432 if (ei >= 2 * nelt)
33437 /* Validate that the halves of the permute are halves. */
33438 for (i = 0; i < nelt2 - 1; ++i)
33439 if (ipar[i] + 1 != ipar[i + 1])
33441 for (i = nelt2; i < nelt - 1; ++i)
33442 if (ipar[i] + 1 != ipar[i + 1])
33445 /* Reconstruct the mask. */
33446 for (i = 0; i < 2; ++i)
33448 unsigned e = ipar[i * nelt2];
33452 mask |= e << (i * 4);
33455 /* Make sure success has a non-zero value by adding one. */
33459 /* Store OPERAND to the memory after reload is completed. This means
33460 that we can't easily use assign_stack_local. */
33462 ix86_force_to_memory (enum machine_mode mode, rtx operand)
33466 gcc_assert (reload_completed);
33467 if (ix86_using_red_zone ())
33469 result = gen_rtx_MEM (mode,
33470 gen_rtx_PLUS (Pmode,
33472 GEN_INT (-RED_ZONE_SIZE)));
33473 emit_move_insn (result, operand);
33475 else if (TARGET_64BIT)
33481 operand = gen_lowpart (DImode, operand);
33485 gen_rtx_SET (VOIDmode,
33486 gen_rtx_MEM (DImode,
33487 gen_rtx_PRE_DEC (DImode,
33488 stack_pointer_rtx)),
33492 gcc_unreachable ();
33494 result = gen_rtx_MEM (mode, stack_pointer_rtx);
33503 split_double_mode (mode, &operand, 1, operands, operands + 1);
33505 gen_rtx_SET (VOIDmode,
33506 gen_rtx_MEM (SImode,
33507 gen_rtx_PRE_DEC (Pmode,
33508 stack_pointer_rtx)),
33511 gen_rtx_SET (VOIDmode,
33512 gen_rtx_MEM (SImode,
33513 gen_rtx_PRE_DEC (Pmode,
33514 stack_pointer_rtx)),
33519 /* Store HImodes as SImodes. */
33520 operand = gen_lowpart (SImode, operand);
33524 gen_rtx_SET (VOIDmode,
33525 gen_rtx_MEM (GET_MODE (operand),
33526 gen_rtx_PRE_DEC (SImode,
33527 stack_pointer_rtx)),
33531 gcc_unreachable ();
33533 result = gen_rtx_MEM (mode, stack_pointer_rtx);
33538 /* Free operand from the memory. */
33540 ix86_free_from_memory (enum machine_mode mode)
33542 if (!ix86_using_red_zone ())
33546 if (mode == DImode || TARGET_64BIT)
33550 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33551 to pop or add instruction if registers are available. */
33552 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
33553 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
33558 /* Return a register priority for hard reg REGNO. */
33560 ix86_register_priority (int hard_regno)
33562 /* ebp and r13 as the base always wants a displacement, r12 as the
33563 base always wants an index. So discourage their usage in an
33565 if (hard_regno == R12_REG || hard_regno == R13_REG)
33567 if (hard_regno == BP_REG)
33569 /* New x86-64 int registers result in bigger code size. Discourage
33571 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
33573 /* New x86-64 SSE registers result in bigger code size. Discourage
33575 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
33577 /* Usage of AX register results in smaller code. Prefer it. */
33578 if (hard_regno == 0)
33583 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33585 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33586 QImode must go into class Q_REGS.
33587 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33588 movdf to do mem-to-mem moves through integer regs. */
33591 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
33593 enum machine_mode mode = GET_MODE (x);
33595 /* We're only allowed to return a subclass of CLASS. Many of the
33596 following checks fail for NO_REGS, so eliminate that early. */
33597 if (regclass == NO_REGS)
33600 /* All classes can load zeros. */
33601 if (x == CONST0_RTX (mode))
33604 /* Force constants into memory if we are loading a (nonzero) constant into
33605 an MMX or SSE register. This is because there are no MMX/SSE instructions
33606 to load from a constant. */
33608 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
33611 /* Prefer SSE regs only, if we can use them for math. */
33612 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
33613 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
33615 /* Floating-point constants need more complex checks. */
33616 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
33618 /* General regs can load everything. */
33619 if (reg_class_subset_p (regclass, GENERAL_REGS))
33622 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33623 zero above. We only want to wind up preferring 80387 registers if
33624 we plan on doing computation with them. */
33626 && standard_80387_constant_p (x) > 0)
33628 /* Limit class to non-sse. */
33629 if (regclass == FLOAT_SSE_REGS)
33631 if (regclass == FP_TOP_SSE_REGS)
33633 if (regclass == FP_SECOND_SSE_REGS)
33634 return FP_SECOND_REG;
33635 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
33642 /* Generally when we see PLUS here, it's the function invariant
33643 (plus soft-fp const_int). Which can only be computed into general
33645 if (GET_CODE (x) == PLUS)
33646 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
33648 /* QImode constants are easy to load, but non-constant QImode data
33649 must go into Q_REGS. */
33650 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
33652 if (reg_class_subset_p (regclass, Q_REGS))
33654 if (reg_class_subset_p (Q_REGS, regclass))
33662 /* Discourage putting floating-point values in SSE registers unless
33663 SSE math is being used, and likewise for the 387 registers. */
33665 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
33667 enum machine_mode mode = GET_MODE (x);
33669 /* Restrict the output reload class to the register bank that we are doing
33670 math on. If we would like not to return a subset of CLASS, reject this
33671 alternative: if reload cannot do this, it will still use its choice. */
33672 mode = GET_MODE (x);
33673 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
33674 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
33676 if (X87_FLOAT_MODE_P (mode))
33678 if (regclass == FP_TOP_SSE_REGS)
33680 else if (regclass == FP_SECOND_SSE_REGS)
33681 return FP_SECOND_REG;
33683 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
33690 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
33691 enum machine_mode mode, secondary_reload_info *sri)
33693 /* Double-word spills from general registers to non-offsettable memory
33694 references (zero-extended addresses) require special handling. */
33697 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
33698 && rclass == GENERAL_REGS
33699 && !offsettable_memref_p (x))
33702 ? CODE_FOR_reload_noff_load
33703 : CODE_FOR_reload_noff_store);
33704 /* Add the cost of moving address to a temporary. */
33705 sri->extra_cost = 1;
33710 /* QImode spills from non-QI registers require
33711 intermediate register on 32bit targets. */
33713 && !in_p && mode == QImode
33714 && (rclass == GENERAL_REGS
33715 || rclass == LEGACY_REGS
33716 || rclass == NON_Q_REGS
33719 || rclass == INDEX_REGS))
33728 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
33729 regno = true_regnum (x);
33731 /* Return Q_REGS if the operand is in memory. */
33736 /* This condition handles corner case where an expression involving
33737 pointers gets vectorized. We're trying to use the address of a
33738 stack slot as a vector initializer.
33740 (set (reg:V2DI 74 [ vect_cst_.2 ])
33741 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33743 Eventually frame gets turned into sp+offset like this:
33745 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33746 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33747 (const_int 392 [0x188]))))
33749 That later gets turned into:
33751 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33752 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33753 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33755 We'll have the following reload recorded:
33757 Reload 0: reload_in (DI) =
33758 (plus:DI (reg/f:DI 7 sp)
33759 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33760 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33761 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33762 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33763 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33764 reload_reg_rtx: (reg:V2DI 22 xmm1)
33766 Which isn't going to work since SSE instructions can't handle scalar
33767 additions. Returning GENERAL_REGS forces the addition into integer
33768 register and reload can handle subsequent reloads without problems. */
33770 if (in_p && GET_CODE (x) == PLUS
33771 && SSE_CLASS_P (rclass)
33772 && SCALAR_INT_MODE_P (mode))
33773 return GENERAL_REGS;
33778 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33781 ix86_class_likely_spilled_p (reg_class_t rclass)
33792 case SSE_FIRST_REG:
33794 case FP_SECOND_REG:
33804 /* If we are copying between general and FP registers, we need a memory
33805 location. The same is true for SSE and MMX registers.
33807 To optimize register_move_cost performance, allow inline variant.
33809 The macro can't work reliably when one of the CLASSES is class containing
33810 registers from multiple units (SSE, MMX, integer). We avoid this by never
33811 combining those units in single alternative in the machine description.
33812 Ensure that this constraint holds to avoid unexpected surprises.
33814 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33815 enforce these sanity checks. */
33818 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
33819 enum machine_mode mode, int strict)
33821 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
33822 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
33823 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
33824 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
33825 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
33826 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
33828 gcc_assert (!strict || lra_in_progress);
33832 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
33835 /* ??? This is a lie. We do have moves between mmx/general, and for
33836 mmx/sse2. But by saying we need secondary memory we discourage the
33837 register allocator from using the mmx registers unless needed. */
33838 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
33841 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
33843 /* SSE1 doesn't have any direct moves from other classes. */
33847 /* If the target says that inter-unit moves are more expensive
33848 than moving through memory, then don't generate them. */
33849 if (!TARGET_INTER_UNIT_MOVES)
33852 /* Between SSE and general, we have moves no larger than word size. */
33853 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
33861 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
33862 enum machine_mode mode, int strict)
33864 return inline_secondary_memory_needed (class1, class2, mode, strict);
33867 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33869 On the 80386, this is the size of MODE in words,
33870 except in the FP regs, where a single reg is always enough. */
33872 static unsigned char
33873 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
33875 if (MAYBE_INTEGER_CLASS_P (rclass))
33877 if (mode == XFmode)
33878 return (TARGET_64BIT ? 2 : 3);
33879 else if (mode == XCmode)
33880 return (TARGET_64BIT ? 4 : 6);
33882 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
33886 if (COMPLEX_MODE_P (mode))
33893 /* Return true if the registers in CLASS cannot represent the change from
33894 modes FROM to TO. */
33897 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
33898 enum reg_class regclass)
33903 /* x87 registers can't do subreg at all, as all values are reformatted
33904 to extended precision. */
33905 if (MAYBE_FLOAT_CLASS_P (regclass))
33908 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
33910 /* Vector registers do not support QI or HImode loads. If we don't
33911 disallow a change to these modes, reload will assume it's ok to
33912 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33913 the vec_dupv4hi pattern. */
33914 if (GET_MODE_SIZE (from) < 4)
33917 /* Vector registers do not support subreg with nonzero offsets, which
33918 are otherwise valid for integer registers. Since we can't see
33919 whether we have a nonzero offset from here, prohibit all
33920 nonparadoxical subregs changing size. */
33921 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
33928 /* Return the cost of moving data of mode M between a
33929 register and memory. A value of 2 is the default; this cost is
33930 relative to those in `REGISTER_MOVE_COST'.
33932 This function is used extensively by register_move_cost that is used to
33933 build tables at startup. Make it inline in this case.
33934 When IN is 2, return maximum of in and out move cost.
33936 If moving between registers and memory is more expensive than
33937 between two registers, you should define this macro to express the
33940 Model also increased moving costs of QImode registers in non
33944 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
33948 if (FLOAT_CLASS_P (regclass))
33966 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
33967 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
33969 if (SSE_CLASS_P (regclass))
33972 switch (GET_MODE_SIZE (mode))
33987 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
33988 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
33990 if (MMX_CLASS_P (regclass))
33993 switch (GET_MODE_SIZE (mode))
34005 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
34006 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
34008 switch (GET_MODE_SIZE (mode))
34011 if (Q_CLASS_P (regclass) || TARGET_64BIT)
34014 return ix86_cost->int_store[0];
34015 if (TARGET_PARTIAL_REG_DEPENDENCY
34016 && optimize_function_for_speed_p (cfun))
34017 cost = ix86_cost->movzbl_load;
34019 cost = ix86_cost->int_load[0];
34021 return MAX (cost, ix86_cost->int_store[0]);
34027 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
34029 return ix86_cost->movzbl_load;
34031 return ix86_cost->int_store[0] + 4;
34036 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
34037 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
34039 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34040 if (mode == TFmode)
34043 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
34045 cost = ix86_cost->int_load[2];
34047 cost = ix86_cost->int_store[2];
34048 return (cost * (((int) GET_MODE_SIZE (mode)
34049 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
34054 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
34057 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
34061 /* Return the cost of moving data from a register in class CLASS1 to
34062 one in class CLASS2.
34064 It is not required that the cost always equal 2 when FROM is the same as TO;
34065 on some machines it is expensive to move between registers if they are not
34066 general registers. */
34069 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
34070 reg_class_t class2_i)
34072 enum reg_class class1 = (enum reg_class) class1_i;
34073 enum reg_class class2 = (enum reg_class) class2_i;
34075 /* In case we require secondary memory, compute cost of the store followed
34076 by load. In order to avoid bad register allocation choices, we need
34077 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34079 if (inline_secondary_memory_needed (class1, class2, mode, 0))
34083 cost += inline_memory_move_cost (mode, class1, 2);
34084 cost += inline_memory_move_cost (mode, class2, 2);
34086 /* In case of copying from general_purpose_register we may emit multiple
34087 stores followed by single load causing memory size mismatch stall.
34088 Count this as arbitrarily high cost of 20. */
34089 if (targetm.class_max_nregs (class1, mode)
34090 > targetm.class_max_nregs (class2, mode))
34093 /* In the case of FP/MMX moves, the registers actually overlap, and we
34094 have to switch modes in order to treat them differently. */
34095 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
34096 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
34102 /* Moves between SSE/MMX and integer unit are expensive. */
34103 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
34104 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
34106 /* ??? By keeping returned value relatively high, we limit the number
34107 of moves between integer and MMX/SSE registers for all targets.
34108 Additionally, high value prevents problem with x86_modes_tieable_p(),
34109 where integer modes in MMX/SSE registers are not tieable
34110 because of missing QImode and HImode moves to, from or between
34111 MMX/SSE registers. */
34112 return MAX (8, ix86_cost->mmxsse_to_integer);
34114 if (MAYBE_FLOAT_CLASS_P (class1))
34115 return ix86_cost->fp_move;
34116 if (MAYBE_SSE_CLASS_P (class1))
34117 return ix86_cost->sse_move;
34118 if (MAYBE_MMX_CLASS_P (class1))
34119 return ix86_cost->mmx_move;
34123 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34127 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
34129 /* Flags and only flags can only hold CCmode values. */
34130 if (CC_REGNO_P (regno))
34131 return GET_MODE_CLASS (mode) == MODE_CC;
34132 if (GET_MODE_CLASS (mode) == MODE_CC
34133 || GET_MODE_CLASS (mode) == MODE_RANDOM
34134 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
34136 if (STACK_REGNO_P (regno))
34137 return VALID_FP_MODE_P (mode);
34138 if (SSE_REGNO_P (regno))
34140 /* We implement the move patterns for all vector modes into and
34141 out of SSE registers, even when no operation instructions
34142 are available. OImode move is available only when AVX is
34144 return ((TARGET_AVX && mode == OImode)
34145 || VALID_AVX256_REG_MODE (mode)
34146 || VALID_SSE_REG_MODE (mode)
34147 || VALID_SSE2_REG_MODE (mode)
34148 || VALID_MMX_REG_MODE (mode)
34149 || VALID_MMX_REG_MODE_3DNOW (mode));
34151 if (MMX_REGNO_P (regno))
34153 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34154 so if the register is available at all, then we can move data of
34155 the given mode into or out of it. */
34156 return (VALID_MMX_REG_MODE (mode)
34157 || VALID_MMX_REG_MODE_3DNOW (mode));
34160 if (mode == QImode)
34162 /* Take care for QImode values - they can be in non-QI regs,
34163 but then they do cause partial register stalls. */
34164 if (TARGET_64BIT || QI_REGNO_P (regno))
34166 if (!TARGET_PARTIAL_REG_STALL)
34168 /* LRA checks if the hard register is OK for the given mode.
34169 QImode values can live in non-QI regs, so we allow all
34171 if (lra_in_progress)
34173 return !can_create_pseudo_p ();
34175 /* We handle both integer and floats in the general purpose registers. */
34176 else if (VALID_INT_MODE_P (mode))
34178 else if (VALID_FP_MODE_P (mode))
34180 else if (VALID_DFP_MODE_P (mode))
34182 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34183 on to use that value in smaller contexts, this can easily force a
34184 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34185 supporting DImode, allow it. */
34186 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
34192 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34193 tieable integer mode. */
34196 ix86_tieable_integer_mode_p (enum machine_mode mode)
34205 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
34208 return TARGET_64BIT;
34215 /* Return true if MODE1 is accessible in a register that can hold MODE2
34216 without copying. That is, all register classes that can hold MODE2
34217 can also hold MODE1. */
34220 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
34222 if (mode1 == mode2)
34225 if (ix86_tieable_integer_mode_p (mode1)
34226 && ix86_tieable_integer_mode_p (mode2))
34229 /* MODE2 being XFmode implies fp stack or general regs, which means we
34230 can tie any smaller floating point modes to it. Note that we do not
34231 tie this with TFmode. */
34232 if (mode2 == XFmode)
34233 return mode1 == SFmode || mode1 == DFmode;
34235 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34236 that we can tie it with SFmode. */
34237 if (mode2 == DFmode)
34238 return mode1 == SFmode;
34240 /* If MODE2 is only appropriate for an SSE register, then tie with
34241 any other mode acceptable to SSE registers. */
34242 if (GET_MODE_SIZE (mode2) == 32
34243 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
34244 return (GET_MODE_SIZE (mode1) == 32
34245 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
34246 if (GET_MODE_SIZE (mode2) == 16
34247 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
34248 return (GET_MODE_SIZE (mode1) == 16
34249 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
34251 /* If MODE2 is appropriate for an MMX register, then tie
34252 with any other mode acceptable to MMX registers. */
34253 if (GET_MODE_SIZE (mode2) == 8
34254 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
34255 return (GET_MODE_SIZE (mode1) == 8
34256 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
34261 /* Return the cost of moving between two registers of mode MODE. */
34264 ix86_set_reg_reg_cost (enum machine_mode mode)
34266 unsigned int units = UNITS_PER_WORD;
34268 switch (GET_MODE_CLASS (mode))
34274 units = GET_MODE_SIZE (CCmode);
34278 if ((TARGET_SSE && mode == TFmode)
34279 || (TARGET_80387 && mode == XFmode)
34280 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
34281 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
34282 units = GET_MODE_SIZE (mode);
34285 case MODE_COMPLEX_FLOAT:
34286 if ((TARGET_SSE && mode == TCmode)
34287 || (TARGET_80387 && mode == XCmode)
34288 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
34289 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
34290 units = GET_MODE_SIZE (mode);
34293 case MODE_VECTOR_INT:
34294 case MODE_VECTOR_FLOAT:
34295 if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34296 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34297 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34298 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
34299 units = GET_MODE_SIZE (mode);
34302 /* Return the cost of moving between two registers of mode MODE,
34303 assuming that the move will be in pieces of at most UNITS bytes. */
34304 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
34307 /* Compute a (partial) cost for rtx X. Return true if the complete
34308 cost has been computed, and false if subexpressions should be
34309 scanned. In either case, *TOTAL contains the cost result. */
34312 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
34315 enum rtx_code code = (enum rtx_code) code_i;
34316 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
34317 enum machine_mode mode = GET_MODE (x);
34318 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
34323 if (register_operand (SET_DEST (x), VOIDmode)
34324 && reg_or_0_operand (SET_SRC (x), VOIDmode))
34326 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
34335 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
34337 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
34339 else if (flag_pic && SYMBOLIC_CONST (x)
34341 || (!GET_CODE (x) != LABEL_REF
34342 && (GET_CODE (x) != SYMBOL_REF
34343 || !SYMBOL_REF_LOCAL_P (x)))))
34350 if (mode == VOIDmode)
34355 switch (standard_80387_constant_p (x))
34360 default: /* Other constants */
34367 if (SSE_FLOAT_MODE_P (mode))
34370 switch (standard_sse_constant_p (x))
34374 case 1: /* 0: xor eliminates false dependency */
34377 default: /* -1: cmp contains false dependency */
34382 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34383 it'll probably end up. Add a penalty for size. */
34384 *total = (COSTS_N_INSNS (1)
34385 + (flag_pic != 0 && !TARGET_64BIT)
34386 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
34390 /* The zero extensions is often completely free on x86_64, so make
34391 it as cheap as possible. */
34392 if (TARGET_64BIT && mode == DImode
34393 && GET_MODE (XEXP (x, 0)) == SImode)
34395 else if (TARGET_ZERO_EXTEND_WITH_AND)
34396 *total = cost->add;
34398 *total = cost->movzx;
34402 *total = cost->movsx;
34406 if (SCALAR_INT_MODE_P (mode)
34407 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
34408 && CONST_INT_P (XEXP (x, 1)))
34410 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
34413 *total = cost->add;
34416 if ((value == 2 || value == 3)
34417 && cost->lea <= cost->shift_const)
34419 *total = cost->lea;
34429 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
34431 /* ??? Should be SSE vector operation cost. */
34432 /* At least for published AMD latencies, this really is the same
34433 as the latency for a simple fpu operation like fabs. */
34434 /* V*QImode is emulated with 1-11 insns. */
34435 if (mode == V16QImode || mode == V32QImode)
34438 if (TARGET_XOP && mode == V16QImode)
34440 /* For XOP we use vpshab, which requires a broadcast of the
34441 value to the variable shift insn. For constants this
34442 means a V16Q const in mem; even when we can perform the
34443 shift with one insn set the cost to prefer paddb. */
34444 if (CONSTANT_P (XEXP (x, 1)))
34446 *total = (cost->fabs
34447 + rtx_cost (XEXP (x, 0), code, 0, speed)
34448 + (speed ? 2 : COSTS_N_BYTES (16)));
34453 else if (TARGET_SSSE3)
34455 *total = cost->fabs * count;
34458 *total = cost->fabs;
34460 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
34462 if (CONST_INT_P (XEXP (x, 1)))
34464 if (INTVAL (XEXP (x, 1)) > 32)
34465 *total = cost->shift_const + COSTS_N_INSNS (2);
34467 *total = cost->shift_const * 2;
34471 if (GET_CODE (XEXP (x, 1)) == AND)
34472 *total = cost->shift_var * 2;
34474 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
34479 if (CONST_INT_P (XEXP (x, 1)))
34480 *total = cost->shift_const;
34481 else if (GET_CODE (XEXP (x, 1)) == SUBREG
34482 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
34484 /* Return the cost after shift-and truncation. */
34485 *total = cost->shift_var;
34489 *total = cost->shift_var;
34497 gcc_assert (FLOAT_MODE_P (mode));
34498 gcc_assert (TARGET_FMA || TARGET_FMA4);
34500 /* ??? SSE scalar/vector cost should be used here. */
34501 /* ??? Bald assumption that fma has the same cost as fmul. */
34502 *total = cost->fmul;
34503 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
34505 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34507 if (GET_CODE (sub) == NEG)
34508 sub = XEXP (sub, 0);
34509 *total += rtx_cost (sub, FMA, 0, speed);
34512 if (GET_CODE (sub) == NEG)
34513 sub = XEXP (sub, 0);
34514 *total += rtx_cost (sub, FMA, 2, speed);
34519 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34521 /* ??? SSE scalar cost should be used here. */
34522 *total = cost->fmul;
34525 else if (X87_FLOAT_MODE_P (mode))
34527 *total = cost->fmul;
34530 else if (FLOAT_MODE_P (mode))
34532 /* ??? SSE vector cost should be used here. */
34533 *total = cost->fmul;
34536 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
34538 /* V*QImode is emulated with 7-13 insns. */
34539 if (mode == V16QImode || mode == V32QImode)
34542 if (TARGET_XOP && mode == V16QImode)
34544 else if (TARGET_SSSE3)
34546 *total = cost->fmul * 2 + cost->fabs * extra;
34548 /* V*DImode is emulated with 5-8 insns. */
34549 else if (mode == V2DImode || mode == V4DImode)
34551 if (TARGET_XOP && mode == V2DImode)
34552 *total = cost->fmul * 2 + cost->fabs * 3;
34554 *total = cost->fmul * 3 + cost->fabs * 5;
34556 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34557 insns, including two PMULUDQ. */
34558 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
34559 *total = cost->fmul * 2 + cost->fabs * 5;
34561 *total = cost->fmul;
34566 rtx op0 = XEXP (x, 0);
34567 rtx op1 = XEXP (x, 1);
34569 if (CONST_INT_P (XEXP (x, 1)))
34571 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
34572 for (nbits = 0; value != 0; value &= value - 1)
34576 /* This is arbitrary. */
34579 /* Compute costs correctly for widening multiplication. */
34580 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
34581 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
34582 == GET_MODE_SIZE (mode))
34584 int is_mulwiden = 0;
34585 enum machine_mode inner_mode = GET_MODE (op0);
34587 if (GET_CODE (op0) == GET_CODE (op1))
34588 is_mulwiden = 1, op1 = XEXP (op1, 0);
34589 else if (CONST_INT_P (op1))
34591 if (GET_CODE (op0) == SIGN_EXTEND)
34592 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
34595 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
34599 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
34602 *total = (cost->mult_init[MODE_INDEX (mode)]
34603 + nbits * cost->mult_bit
34604 + rtx_cost (op0, outer_code, opno, speed)
34605 + rtx_cost (op1, outer_code, opno, speed));
34614 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34615 /* ??? SSE cost should be used here. */
34616 *total = cost->fdiv;
34617 else if (X87_FLOAT_MODE_P (mode))
34618 *total = cost->fdiv;
34619 else if (FLOAT_MODE_P (mode))
34620 /* ??? SSE vector cost should be used here. */
34621 *total = cost->fdiv;
34623 *total = cost->divide[MODE_INDEX (mode)];
34627 if (GET_MODE_CLASS (mode) == MODE_INT
34628 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
34630 if (GET_CODE (XEXP (x, 0)) == PLUS
34631 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
34632 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
34633 && CONSTANT_P (XEXP (x, 1)))
34635 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
34636 if (val == 2 || val == 4 || val == 8)
34638 *total = cost->lea;
34639 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
34640 outer_code, opno, speed);
34641 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
34642 outer_code, opno, speed);
34643 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
34647 else if (GET_CODE (XEXP (x, 0)) == MULT
34648 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
34650 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
34651 if (val == 2 || val == 4 || val == 8)
34653 *total = cost->lea;
34654 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
34655 outer_code, opno, speed);
34656 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
34660 else if (GET_CODE (XEXP (x, 0)) == PLUS)
34662 *total = cost->lea;
34663 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
34664 outer_code, opno, speed);
34665 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
34666 outer_code, opno, speed);
34667 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
34674 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34676 /* ??? SSE cost should be used here. */
34677 *total = cost->fadd;
34680 else if (X87_FLOAT_MODE_P (mode))
34682 *total = cost->fadd;
34685 else if (FLOAT_MODE_P (mode))
34687 /* ??? SSE vector cost should be used here. */
34688 *total = cost->fadd;
34696 if (GET_MODE_CLASS (mode) == MODE_INT
34697 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
34699 *total = (cost->add * 2
34700 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
34701 << (GET_MODE (XEXP (x, 0)) != DImode))
34702 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
34703 << (GET_MODE (XEXP (x, 1)) != DImode)));
34709 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34711 /* ??? SSE cost should be used here. */
34712 *total = cost->fchs;
34715 else if (X87_FLOAT_MODE_P (mode))
34717 *total = cost->fchs;
34720 else if (FLOAT_MODE_P (mode))
34722 /* ??? SSE vector cost should be used here. */
34723 *total = cost->fchs;
34729 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
34731 /* ??? Should be SSE vector operation cost. */
34732 /* At least for published AMD latencies, this really is the same
34733 as the latency for a simple fpu operation like fabs. */
34734 *total = cost->fabs;
34736 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
34737 *total = cost->add * 2;
34739 *total = cost->add;
34743 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
34744 && XEXP (XEXP (x, 0), 1) == const1_rtx
34745 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
34746 && XEXP (x, 1) == const0_rtx)
34748 /* This kind of construct is implemented using test[bwl].
34749 Treat it as if we had an AND. */
34750 *total = (cost->add
34751 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
34752 + rtx_cost (const1_rtx, outer_code, opno, speed));
34758 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
34763 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34764 /* ??? SSE cost should be used here. */
34765 *total = cost->fabs;
34766 else if (X87_FLOAT_MODE_P (mode))
34767 *total = cost->fabs;
34768 else if (FLOAT_MODE_P (mode))
34769 /* ??? SSE vector cost should be used here. */
34770 *total = cost->fabs;
34774 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34775 /* ??? SSE cost should be used here. */
34776 *total = cost->fsqrt;
34777 else if (X87_FLOAT_MODE_P (mode))
34778 *total = cost->fsqrt;
34779 else if (FLOAT_MODE_P (mode))
34780 /* ??? SSE vector cost should be used here. */
34781 *total = cost->fsqrt;
34785 if (XINT (x, 1) == UNSPEC_TP)
34792 case VEC_DUPLICATE:
34793 /* ??? Assume all of these vector manipulation patterns are
34794 recognizable. In which case they all pretty much have the
34796 *total = cost->fabs;
34806 static int current_machopic_label_num;
34808 /* Given a symbol name and its associated stub, write out the
34809 definition of the stub. */
34812 machopic_output_stub (FILE *file, const char *symb, const char *stub)
34814 unsigned int length;
34815 char *binder_name, *symbol_name, lazy_ptr_name[32];
34816 int label = ++current_machopic_label_num;
34818 /* For 64-bit we shouldn't get here. */
34819 gcc_assert (!TARGET_64BIT);
34821 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34822 symb = targetm.strip_name_encoding (symb);
34824 length = strlen (stub);
34825 binder_name = XALLOCAVEC (char, length + 32);
34826 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
34828 length = strlen (symb);
34829 symbol_name = XALLOCAVEC (char, length + 32);
34830 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
34832 sprintf (lazy_ptr_name, "L%d$lz", label);
34834 if (MACHOPIC_ATT_STUB)
34835 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
34836 else if (MACHOPIC_PURE)
34837 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
34839 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
34841 fprintf (file, "%s:\n", stub);
34842 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34844 if (MACHOPIC_ATT_STUB)
34846 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34848 else if (MACHOPIC_PURE)
34851 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34852 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
34853 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34854 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34855 label, lazy_ptr_name, label);
34856 fprintf (file, "\tjmp\t*%%ecx\n");
34859 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
34861 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34862 it needs no stub-binding-helper. */
34863 if (MACHOPIC_ATT_STUB)
34866 fprintf (file, "%s:\n", binder_name);
34870 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
34871 fprintf (file, "\tpushl\t%%ecx\n");
34874 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
34876 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
34878 /* N.B. Keep the correspondence of these
34879 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34880 old-pic/new-pic/non-pic stubs; altering this will break
34881 compatibility with existing dylibs. */
34884 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34885 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
34888 /* 16-byte -mdynamic-no-pic stub. */
34889 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
34891 fprintf (file, "%s:\n", lazy_ptr_name);
34892 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34893 fprintf (file, ASM_LONG "%s\n", binder_name);
34895 #endif /* TARGET_MACHO */
34897 /* Order the registers for register allocator. */
34900 x86_order_regs_for_local_alloc (void)
34905 /* First allocate the local general purpose registers. */
34906 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
34907 if (GENERAL_REGNO_P (i) && call_used_regs[i])
34908 reg_alloc_order [pos++] = i;
34910 /* Global general purpose registers. */
34911 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
34912 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
34913 reg_alloc_order [pos++] = i;
34915 /* x87 registers come first in case we are doing FP math
34917 if (!TARGET_SSE_MATH)
34918 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
34919 reg_alloc_order [pos++] = i;
34921 /* SSE registers. */
34922 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
34923 reg_alloc_order [pos++] = i;
34924 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
34925 reg_alloc_order [pos++] = i;
34927 /* x87 registers. */
34928 if (TARGET_SSE_MATH)
34929 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
34930 reg_alloc_order [pos++] = i;
34932 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
34933 reg_alloc_order [pos++] = i;
34935 /* Initialize the rest of array as we do not allocate some registers
34937 while (pos < FIRST_PSEUDO_REGISTER)
34938 reg_alloc_order [pos++] = 0;
34941 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34942 in struct attribute_spec handler. */
34944 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
34946 int flags ATTRIBUTE_UNUSED,
34947 bool *no_add_attrs)
34949 if (TREE_CODE (*node) != FUNCTION_TYPE
34950 && TREE_CODE (*node) != METHOD_TYPE
34951 && TREE_CODE (*node) != FIELD_DECL
34952 && TREE_CODE (*node) != TYPE_DECL)
34954 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34956 *no_add_attrs = true;
34961 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
34963 *no_add_attrs = true;
34966 if (is_attribute_p ("callee_pop_aggregate_return", name))
34970 cst = TREE_VALUE (args);
34971 if (TREE_CODE (cst) != INTEGER_CST)
34973 warning (OPT_Wattributes,
34974 "%qE attribute requires an integer constant argument",
34976 *no_add_attrs = true;
34978 else if (compare_tree_int (cst, 0) != 0
34979 && compare_tree_int (cst, 1) != 0)
34981 warning (OPT_Wattributes,
34982 "argument to %qE attribute is neither zero, nor one",
34984 *no_add_attrs = true;
34993 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34994 struct attribute_spec.handler. */
34996 ix86_handle_abi_attribute (tree *node, tree name,
34997 tree args ATTRIBUTE_UNUSED,
34998 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35000 if (TREE_CODE (*node) != FUNCTION_TYPE
35001 && TREE_CODE (*node) != METHOD_TYPE
35002 && TREE_CODE (*node) != FIELD_DECL
35003 && TREE_CODE (*node) != TYPE_DECL)
35005 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35007 *no_add_attrs = true;
35011 /* Can combine regparm with all attributes but fastcall. */
35012 if (is_attribute_p ("ms_abi", name))
35014 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
35016 error ("ms_abi and sysv_abi attributes are not compatible");
35021 else if (is_attribute_p ("sysv_abi", name))
35023 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
35025 error ("ms_abi and sysv_abi attributes are not compatible");
35034 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35035 struct attribute_spec.handler. */
35037 ix86_handle_struct_attribute (tree *node, tree name,
35038 tree args ATTRIBUTE_UNUSED,
35039 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35042 if (DECL_P (*node))
35044 if (TREE_CODE (*node) == TYPE_DECL)
35045 type = &TREE_TYPE (*node);
35050 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
35052 warning (OPT_Wattributes, "%qE attribute ignored",
35054 *no_add_attrs = true;
35057 else if ((is_attribute_p ("ms_struct", name)
35058 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35059 || ((is_attribute_p ("gcc_struct", name)
35060 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35062 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35064 *no_add_attrs = true;
35071 ix86_handle_fndecl_attribute (tree *node, tree name,
35072 tree args ATTRIBUTE_UNUSED,
35073 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35075 if (TREE_CODE (*node) != FUNCTION_DECL)
35077 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35079 *no_add_attrs = true;
35085 ix86_ms_bitfield_layout_p (const_tree record_type)
35087 return ((TARGET_MS_BITFIELD_LAYOUT
35088 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35089 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
35092 /* Returns an expression indicating where the this parameter is
35093 located on entry to the FUNCTION. */
35096 x86_this_parameter (tree function)
35098 tree type = TREE_TYPE (function);
35099 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
35104 const int *parm_regs;
35106 if (ix86_function_type_abi (type) == MS_ABI)
35107 parm_regs = x86_64_ms_abi_int_parameter_registers;
35109 parm_regs = x86_64_int_parameter_registers;
35110 return gen_rtx_REG (Pmode, parm_regs[aggr]);
35113 nregs = ix86_function_regparm (type, function);
35115 if (nregs > 0 && !stdarg_p (type))
35118 unsigned int ccvt = ix86_get_callcvt (type);
35120 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
35121 regno = aggr ? DX_REG : CX_REG;
35122 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
35126 return gen_rtx_MEM (SImode,
35127 plus_constant (Pmode, stack_pointer_rtx, 4));
35136 return gen_rtx_MEM (SImode,
35137 plus_constant (Pmode,
35138 stack_pointer_rtx, 4));
35141 return gen_rtx_REG (SImode, regno);
35144 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
35148 /* Determine whether x86_output_mi_thunk can succeed. */
35151 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
35152 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
35153 HOST_WIDE_INT vcall_offset, const_tree function)
35155 /* 64-bit can handle anything. */
35159 /* For 32-bit, everything's fine if we have one free register. */
35160 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
35163 /* Need a free register for vcall_offset. */
35167 /* Need a free register for GOT references. */
35168 if (flag_pic && !targetm.binds_local_p (function))
35171 /* Otherwise ok. */
35175 /* Output the assembler code for a thunk function. THUNK_DECL is the
35176 declaration for the thunk function itself, FUNCTION is the decl for
35177 the target function. DELTA is an immediate constant offset to be
35178 added to THIS. If VCALL_OFFSET is nonzero, the word at
35179 *(*this + vcall_offset) should be added to THIS. */
35182 x86_output_mi_thunk (FILE *file,
35183 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
35184 HOST_WIDE_INT vcall_offset, tree function)
35186 rtx this_param = x86_this_parameter (function);
35187 rtx this_reg, tmp, fnaddr;
35188 unsigned int tmp_regno;
35191 tmp_regno = R10_REG;
35194 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
35195 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
35196 tmp_regno = AX_REG;
35197 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
35198 tmp_regno = DX_REG;
35200 tmp_regno = CX_REG;
35203 emit_note (NOTE_INSN_PROLOGUE_END);
35205 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35206 pull it in now and let DELTA benefit. */
35207 if (REG_P (this_param))
35208 this_reg = this_param;
35209 else if (vcall_offset)
35211 /* Put the this parameter into %eax. */
35212 this_reg = gen_rtx_REG (Pmode, AX_REG);
35213 emit_move_insn (this_reg, this_param);
35216 this_reg = NULL_RTX;
35218 /* Adjust the this parameter by a fixed constant. */
35221 rtx delta_rtx = GEN_INT (delta);
35222 rtx delta_dst = this_reg ? this_reg : this_param;
35226 if (!x86_64_general_operand (delta_rtx, Pmode))
35228 tmp = gen_rtx_REG (Pmode, tmp_regno);
35229 emit_move_insn (tmp, delta_rtx);
35234 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
35237 /* Adjust the this parameter by a value stored in the vtable. */
35240 rtx vcall_addr, vcall_mem, this_mem;
35242 tmp = gen_rtx_REG (Pmode, tmp_regno);
35244 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
35245 if (Pmode != ptr_mode)
35246 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
35247 emit_move_insn (tmp, this_mem);
35249 /* Adjust the this parameter. */
35250 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
35252 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
35254 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
35255 emit_move_insn (tmp2, GEN_INT (vcall_offset));
35256 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
35259 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
35260 if (Pmode != ptr_mode)
35261 emit_insn (gen_addsi_1_zext (this_reg,
35262 gen_rtx_REG (ptr_mode,
35266 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
35269 /* If necessary, drop THIS back to its stack slot. */
35270 if (this_reg && this_reg != this_param)
35271 emit_move_insn (this_param, this_reg);
35273 fnaddr = XEXP (DECL_RTL (function), 0);
35276 if (!flag_pic || targetm.binds_local_p (function)
35277 || cfun->machine->call_abi == MS_ABI)
35281 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
35282 tmp = gen_rtx_CONST (Pmode, tmp);
35283 fnaddr = gen_rtx_MEM (Pmode, tmp);
35288 if (!flag_pic || targetm.binds_local_p (function))
35291 else if (TARGET_MACHO)
35293 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
35294 fnaddr = XEXP (fnaddr, 0);
35296 #endif /* TARGET_MACHO */
35299 tmp = gen_rtx_REG (Pmode, CX_REG);
35300 output_set_got (tmp, NULL_RTX);
35302 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
35303 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
35304 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
35308 /* Our sibling call patterns do not allow memories, because we have no
35309 predicate that can distinguish between frame and non-frame memory.
35310 For our purposes here, we can get away with (ab)using a jump pattern,
35311 because we're going to do no optimization. */
35312 if (MEM_P (fnaddr))
35313 emit_jump_insn (gen_indirect_jump (fnaddr));
35316 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
35317 fnaddr = legitimize_pic_address (fnaddr,
35318 gen_rtx_REG (Pmode, tmp_regno));
35320 if (!sibcall_insn_operand (fnaddr, word_mode))
35322 tmp = gen_rtx_REG (word_mode, tmp_regno);
35323 if (GET_MODE (fnaddr) != word_mode)
35324 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
35325 emit_move_insn (tmp, fnaddr);
35329 tmp = gen_rtx_MEM (QImode, fnaddr);
35330 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
35331 tmp = emit_call_insn (tmp);
35332 SIBLING_CALL_P (tmp) = 1;
35336 /* Emit just enough of rest_of_compilation to get the insns emitted.
35337 Note that use_thunk calls assemble_start_function et al. */
35338 tmp = get_insns ();
35339 shorten_branches (tmp);
35340 final_start_function (tmp, file, 1);
35341 final (tmp, file, 1);
35342 final_end_function ();
35346 x86_file_start (void)
35348 default_file_start ();
35350 darwin_file_start ();
35352 if (X86_FILE_START_VERSION_DIRECTIVE)
35353 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
35354 if (X86_FILE_START_FLTUSED)
35355 fputs ("\t.global\t__fltused\n", asm_out_file);
35356 if (ix86_asm_dialect == ASM_INTEL)
35357 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
35361 x86_field_alignment (tree field, int computed)
35363 enum machine_mode mode;
35364 tree type = TREE_TYPE (field);
35366 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
35368 mode = TYPE_MODE (strip_array_types (type));
35369 if (mode == DFmode || mode == DCmode
35370 || GET_MODE_CLASS (mode) == MODE_INT
35371 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
35372 return MIN (32, computed);
35376 /* Output assembler code to FILE to increment profiler label # LABELNO
35377 for profiling a function entry. */
35379 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
35381 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
35386 #ifndef NO_PROFILE_COUNTERS
35387 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
35390 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
35391 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
35393 fprintf (file, "\tcall\t%s\n", mcount_name);
35397 #ifndef NO_PROFILE_COUNTERS
35398 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
35401 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
35405 #ifndef NO_PROFILE_COUNTERS
35406 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
35409 fprintf (file, "\tcall\t%s\n", mcount_name);
35413 /* We don't have exact information about the insn sizes, but we may assume
35414 quite safely that we are informed about all 1 byte insns and memory
35415 address sizes. This is enough to eliminate unnecessary padding in
35419 min_insn_size (rtx insn)
35423 if (!INSN_P (insn) || !active_insn_p (insn))
35426 /* Discard alignments we've emit and jump instructions. */
35427 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
35428 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
35430 if (JUMP_TABLE_DATA_P (insn))
35433 /* Important case - calls are always 5 bytes.
35434 It is common to have many calls in the row. */
35436 && symbolic_reference_mentioned_p (PATTERN (insn))
35437 && !SIBLING_CALL_P (insn))
35439 len = get_attr_length (insn);
35443 /* For normal instructions we rely on get_attr_length being exact,
35444 with a few exceptions. */
35445 if (!JUMP_P (insn))
35447 enum attr_type type = get_attr_type (insn);
35452 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
35453 || asm_noperands (PATTERN (insn)) >= 0)
35460 /* Otherwise trust get_attr_length. */
35464 l = get_attr_length_address (insn);
35465 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
35474 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35476 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35480 ix86_avoid_jump_mispredicts (void)
35482 rtx insn, start = get_insns ();
35483 int nbytes = 0, njumps = 0;
35486 /* Look for all minimal intervals of instructions containing 4 jumps.
35487 The intervals are bounded by START and INSN. NBYTES is the total
35488 size of instructions in the interval including INSN and not including
35489 START. When the NBYTES is smaller than 16 bytes, it is possible
35490 that the end of START and INSN ends up in the same 16byte page.
35492 The smallest offset in the page INSN can start is the case where START
35493 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35494 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35496 for (insn = start; insn; insn = NEXT_INSN (insn))
35500 if (LABEL_P (insn))
35502 int align = label_to_alignment (insn);
35503 int max_skip = label_to_max_skip (insn);
35507 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35508 already in the current 16 byte page, because otherwise
35509 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35510 bytes to reach 16 byte boundary. */
35512 || (align <= 3 && max_skip != (1 << align) - 1))
35515 fprintf (dump_file, "Label %i with max_skip %i\n",
35516 INSN_UID (insn), max_skip);
35519 while (nbytes + max_skip >= 16)
35521 start = NEXT_INSN (start);
35522 if ((JUMP_P (start)
35523 && GET_CODE (PATTERN (start)) != ADDR_VEC
35524 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
35526 njumps--, isjump = 1;
35529 nbytes -= min_insn_size (start);
35535 min_size = min_insn_size (insn);
35536 nbytes += min_size;
35538 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
35539 INSN_UID (insn), min_size);
35541 && GET_CODE (PATTERN (insn)) != ADDR_VEC
35542 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
35550 start = NEXT_INSN (start);
35551 if ((JUMP_P (start)
35552 && GET_CODE (PATTERN (start)) != ADDR_VEC
35553 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
35555 njumps--, isjump = 1;
35558 nbytes -= min_insn_size (start);
35560 gcc_assert (njumps >= 0);
35562 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
35563 INSN_UID (start), INSN_UID (insn), nbytes);
35565 if (njumps == 3 && isjump && nbytes < 16)
35567 int padsize = 15 - nbytes + min_insn_size (insn);
35570 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
35571 INSN_UID (insn), padsize);
35572 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
35578 /* AMD Athlon works faster
35579 when RET is not destination of conditional jump or directly preceded
35580 by other jump instruction. We avoid the penalty by inserting NOP just
35581 before the RET instructions in such cases. */
35583 ix86_pad_returns (void)
35588 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
35590 basic_block bb = e->src;
35591 rtx ret = BB_END (bb);
35593 bool replace = false;
35595 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
35596 || optimize_bb_for_size_p (bb))
35598 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
35599 if (active_insn_p (prev) || LABEL_P (prev))
35601 if (prev && LABEL_P (prev))
35606 FOR_EACH_EDGE (e, ei, bb->preds)
35607 if (EDGE_FREQUENCY (e) && e->src->index >= 0
35608 && !(e->flags & EDGE_FALLTHRU))
35613 prev = prev_active_insn (ret);
35615 && ((JUMP_P (prev) && any_condjump_p (prev))
35618 /* Empty functions get branch mispredict even when
35619 the jump destination is not visible to us. */
35620 if (!prev && !optimize_function_for_size_p (cfun))
35625 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
35631 /* Count the minimum number of instructions in BB. Return 4 if the
35632 number of instructions >= 4. */
35635 ix86_count_insn_bb (basic_block bb)
35638 int insn_count = 0;
35640 /* Count number of instructions in this block. Return 4 if the number
35641 of instructions >= 4. */
35642 FOR_BB_INSNS (bb, insn)
35644 /* Only happen in exit blocks. */
35646 && ANY_RETURN_P (PATTERN (insn)))
35649 if (NONDEBUG_INSN_P (insn)
35650 && GET_CODE (PATTERN (insn)) != USE
35651 && GET_CODE (PATTERN (insn)) != CLOBBER)
35654 if (insn_count >= 4)
35663 /* Count the minimum number of instructions in code path in BB.
35664 Return 4 if the number of instructions >= 4. */
35667 ix86_count_insn (basic_block bb)
35671 int min_prev_count;
35673 /* Only bother counting instructions along paths with no
35674 more than 2 basic blocks between entry and exit. Given
35675 that BB has an edge to exit, determine if a predecessor
35676 of BB has an edge from entry. If so, compute the number
35677 of instructions in the predecessor block. If there
35678 happen to be multiple such blocks, compute the minimum. */
35679 min_prev_count = 4;
35680 FOR_EACH_EDGE (e, ei, bb->preds)
35683 edge_iterator prev_ei;
35685 if (e->src == ENTRY_BLOCK_PTR)
35687 min_prev_count = 0;
35690 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
35692 if (prev_e->src == ENTRY_BLOCK_PTR)
35694 int count = ix86_count_insn_bb (e->src);
35695 if (count < min_prev_count)
35696 min_prev_count = count;
35702 if (min_prev_count < 4)
35703 min_prev_count += ix86_count_insn_bb (bb);
35705 return min_prev_count;
35708 /* Pad short function to 4 instructions. */
35711 ix86_pad_short_function (void)
35716 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
35718 rtx ret = BB_END (e->src);
35719 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
35721 int insn_count = ix86_count_insn (e->src);
35723 /* Pad short function. */
35724 if (insn_count < 4)
35728 /* Find epilogue. */
35731 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
35732 insn = PREV_INSN (insn);
35737 /* Two NOPs count as one instruction. */
35738 insn_count = 2 * (4 - insn_count);
35739 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
35745 /* Implement machine specific optimizations. We implement padding of returns
35746 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35750 /* We are freeing block_for_insn in the toplev to keep compatibility
35751 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35752 compute_bb_for_insn ();
35754 if (optimize && optimize_function_for_speed_p (cfun))
35756 if (TARGET_PAD_SHORT_FUNCTION)
35757 ix86_pad_short_function ();
35758 else if (TARGET_PAD_RETURNS)
35759 ix86_pad_returns ();
35760 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35761 if (TARGET_FOUR_JUMP_LIMIT)
35762 ix86_avoid_jump_mispredicts ();
35767 /* Return nonzero when QImode register that must be represented via REX prefix
35770 x86_extended_QIreg_mentioned_p (rtx insn)
35773 extract_insn_cached (insn);
35774 for (i = 0; i < recog_data.n_operands; i++)
35775 if (GENERAL_REG_P (recog_data.operand[i])
35776 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
35781 /* Return nonzero when P points to register encoded via REX prefix.
35782 Called via for_each_rtx. */
35784 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
35786 unsigned int regno;
35789 regno = REGNO (*p);
35790 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
35793 /* Return true when INSN mentions register that must be encoded using REX
35796 x86_extended_reg_mentioned_p (rtx insn)
35798 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
35799 extended_reg_mentioned_1, NULL);
35802 /* If profitable, negate (without causing overflow) integer constant
35803 of mode MODE at location LOC. Return true in this case. */
35805 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
35809 if (!CONST_INT_P (*loc))
35815 /* DImode x86_64 constants must fit in 32 bits. */
35816 gcc_assert (x86_64_immediate_operand (*loc, mode));
35827 gcc_unreachable ();
35830 /* Avoid overflows. */
35831 if (mode_signbit_p (mode, *loc))
35834 val = INTVAL (*loc);
35836 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35837 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35838 if ((val < 0 && val != -128)
35841 *loc = GEN_INT (-val);
35848 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35849 optabs would emit if we didn't have TFmode patterns. */
35852 x86_emit_floatuns (rtx operands[2])
35854 rtx neglab, donelab, i0, i1, f0, in, out;
35855 enum machine_mode mode, inmode;
35857 inmode = GET_MODE (operands[1]);
35858 gcc_assert (inmode == SImode || inmode == DImode);
35861 in = force_reg (inmode, operands[1]);
35862 mode = GET_MODE (out);
35863 neglab = gen_label_rtx ();
35864 donelab = gen_label_rtx ();
35865 f0 = gen_reg_rtx (mode);
35867 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
35869 expand_float (out, in, 0);
35871 emit_jump_insn (gen_jump (donelab));
35874 emit_label (neglab);
35876 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
35878 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
35880 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
35882 expand_float (f0, i0, 0);
35884 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
35886 emit_label (donelab);
35889 /* AVX2 does support 32-byte integer vector operations,
35890 thus the longest vector we are faced with is V32QImode. */
35891 #define MAX_VECT_LEN 32
35893 struct expand_vec_perm_d
35895 rtx target, op0, op1;
35896 unsigned char perm[MAX_VECT_LEN];
35897 enum machine_mode vmode;
35898 unsigned char nelt;
35899 bool one_operand_p;
35903 static bool canonicalize_perm (struct expand_vec_perm_d *d);
35904 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
35905 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
35907 /* Get a vector mode of the same size as the original but with elements
35908 twice as wide. This is only guaranteed to apply to integral vectors. */
35910 static inline enum machine_mode
35911 get_mode_wider_vector (enum machine_mode o)
35913 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35914 enum machine_mode n = GET_MODE_WIDER_MODE (o);
35915 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
35916 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
35920 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35921 with all elements equal to VAR. Return true if successful. */
35924 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
35925 rtx target, rtx val)
35948 /* First attempt to recognize VAL as-is. */
35949 dup = gen_rtx_VEC_DUPLICATE (mode, val);
35950 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
35951 if (recog_memoized (insn) < 0)
35954 /* If that fails, force VAL into a register. */
35957 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
35958 seq = get_insns ();
35961 emit_insn_before (seq, insn);
35963 ok = recog_memoized (insn) >= 0;
35972 if (TARGET_SSE || TARGET_3DNOW_A)
35976 val = gen_lowpart (SImode, val);
35977 x = gen_rtx_TRUNCATE (HImode, val);
35978 x = gen_rtx_VEC_DUPLICATE (mode, x);
35979 emit_insn (gen_rtx_SET (VOIDmode, target, x));
35992 struct expand_vec_perm_d dperm;
35996 memset (&dperm, 0, sizeof (dperm));
35997 dperm.target = target;
35998 dperm.vmode = mode;
35999 dperm.nelt = GET_MODE_NUNITS (mode);
36000 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
36001 dperm.one_operand_p = true;
36003 /* Extend to SImode using a paradoxical SUBREG. */
36004 tmp1 = gen_reg_rtx (SImode);
36005 emit_move_insn (tmp1, gen_lowpart (SImode, val));
36007 /* Insert the SImode value as low element of a V4SImode vector. */
36008 tmp2 = gen_lowpart (V4SImode, dperm.op0);
36009 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
36011 ok = (expand_vec_perm_1 (&dperm)
36012 || expand_vec_perm_broadcast_1 (&dperm));
36024 /* Replicate the value once into the next wider mode and recurse. */
36026 enum machine_mode smode, wsmode, wvmode;
36029 smode = GET_MODE_INNER (mode);
36030 wvmode = get_mode_wider_vector (mode);
36031 wsmode = GET_MODE_INNER (wvmode);
36033 val = convert_modes (wsmode, smode, val, true);
36034 x = expand_simple_binop (wsmode, ASHIFT, val,
36035 GEN_INT (GET_MODE_BITSIZE (smode)),
36036 NULL_RTX, 1, OPTAB_LIB_WIDEN);
36037 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
36039 x = gen_lowpart (wvmode, target);
36040 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
36048 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
36049 rtx x = gen_reg_rtx (hvmode);
36051 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
36054 x = gen_rtx_VEC_CONCAT (mode, x, x);
36055 emit_insn (gen_rtx_SET (VOIDmode, target, x));
36064 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36065 whose ONE_VAR element is VAR, and other elements are zero. Return true
36069 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
36070 rtx target, rtx var, int one_var)
36072 enum machine_mode vsimode;
36075 bool use_vector_set = false;
36080 /* For SSE4.1, we normally use vector set. But if the second
36081 element is zero and inter-unit moves are OK, we use movq
36083 use_vector_set = (TARGET_64BIT
36085 && !(TARGET_INTER_UNIT_MOVES
36091 use_vector_set = TARGET_SSE4_1;
36094 use_vector_set = TARGET_SSE2;
36097 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
36104 use_vector_set = TARGET_AVX;
36107 /* Use ix86_expand_vector_set in 64bit mode only. */
36108 use_vector_set = TARGET_AVX && TARGET_64BIT;
36114 if (use_vector_set)
36116 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
36117 var = force_reg (GET_MODE_INNER (mode), var);
36118 ix86_expand_vector_set (mmx_ok, target, var, one_var);
36134 var = force_reg (GET_MODE_INNER (mode), var);
36135 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
36136 emit_insn (gen_rtx_SET (VOIDmode, target, x));
36141 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
36142 new_target = gen_reg_rtx (mode);
36144 new_target = target;
36145 var = force_reg (GET_MODE_INNER (mode), var);
36146 x = gen_rtx_VEC_DUPLICATE (mode, var);
36147 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
36148 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
36151 /* We need to shuffle the value to the correct position, so
36152 create a new pseudo to store the intermediate result. */
36154 /* With SSE2, we can use the integer shuffle insns. */
36155 if (mode != V4SFmode && TARGET_SSE2)
36157 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
36159 GEN_INT (one_var == 1 ? 0 : 1),
36160 GEN_INT (one_var == 2 ? 0 : 1),
36161 GEN_INT (one_var == 3 ? 0 : 1)));
36162 if (target != new_target)
36163 emit_move_insn (target, new_target);
36167 /* Otherwise convert the intermediate result to V4SFmode and
36168 use the SSE1 shuffle instructions. */
36169 if (mode != V4SFmode)
36171 tmp = gen_reg_rtx (V4SFmode);
36172 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
36177 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
36179 GEN_INT (one_var == 1 ? 0 : 1),
36180 GEN_INT (one_var == 2 ? 0+4 : 1+4),
36181 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
36183 if (mode != V4SFmode)
36184 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
36185 else if (tmp != target)
36186 emit_move_insn (target, tmp);
36188 else if (target != new_target)
36189 emit_move_insn (target, new_target);
36194 vsimode = V4SImode;
36200 vsimode = V2SImode;
36206 /* Zero extend the variable element to SImode and recurse. */
36207 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
36209 x = gen_reg_rtx (vsimode);
36210 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
36212 gcc_unreachable ();
36214 emit_move_insn (target, gen_lowpart (mode, x));
36222 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36223 consisting of the values in VALS. It is known that all elements
36224 except ONE_VAR are constants. Return true if successful. */
36227 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
36228 rtx target, rtx vals, int one_var)
36230 rtx var = XVECEXP (vals, 0, one_var);
36231 enum machine_mode wmode;
36234 const_vec = copy_rtx (vals);
36235 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
36236 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
36244 /* For the two element vectors, it's just as easy to use
36245 the general case. */
36249 /* Use ix86_expand_vector_set in 64bit mode only. */
36272 /* There's no way to set one QImode entry easily. Combine
36273 the variable value with its adjacent constant value, and
36274 promote to an HImode set. */
36275 x = XVECEXP (vals, 0, one_var ^ 1);
36278 var = convert_modes (HImode, QImode, var, true);
36279 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
36280 NULL_RTX, 1, OPTAB_LIB_WIDEN);
36281 x = GEN_INT (INTVAL (x) & 0xff);
36285 var = convert_modes (HImode, QImode, var, true);
36286 x = gen_int_mode (INTVAL (x) << 8, HImode);
36288 if (x != const0_rtx)
36289 var = expand_simple_binop (HImode, IOR, var, x, var,
36290 1, OPTAB_LIB_WIDEN);
36292 x = gen_reg_rtx (wmode);
36293 emit_move_insn (x, gen_lowpart (wmode, const_vec));
36294 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
36296 emit_move_insn (target, gen_lowpart (mode, x));
36303 emit_move_insn (target, const_vec);
36304 ix86_expand_vector_set (mmx_ok, target, var, one_var);
36308 /* A subroutine of ix86_expand_vector_init_general. Use vector
36309 concatenate to handle the most general case: all values variable,
36310 and none identical. */
36313 ix86_expand_vector_init_concat (enum machine_mode mode,
36314 rtx target, rtx *ops, int n)
36316 enum machine_mode cmode, hmode = VOIDmode;
36317 rtx first[8], second[4];
36357 gcc_unreachable ();
36360 if (!register_operand (ops[1], cmode))
36361 ops[1] = force_reg (cmode, ops[1]);
36362 if (!register_operand (ops[0], cmode))
36363 ops[0] = force_reg (cmode, ops[0]);
36364 emit_insn (gen_rtx_SET (VOIDmode, target,
36365 gen_rtx_VEC_CONCAT (mode, ops[0],
36385 gcc_unreachable ();
36401 gcc_unreachable ();
36406 /* FIXME: We process inputs backward to help RA. PR 36222. */
36409 for (; i > 0; i -= 2, j--)
36411 first[j] = gen_reg_rtx (cmode);
36412 v = gen_rtvec (2, ops[i - 1], ops[i]);
36413 ix86_expand_vector_init (false, first[j],
36414 gen_rtx_PARALLEL (cmode, v));
36420 gcc_assert (hmode != VOIDmode);
36421 for (i = j = 0; i < n; i += 2, j++)
36423 second[j] = gen_reg_rtx (hmode);
36424 ix86_expand_vector_init_concat (hmode, second [j],
36428 ix86_expand_vector_init_concat (mode, target, second, n);
36431 ix86_expand_vector_init_concat (mode, target, first, n);
36435 gcc_unreachable ();
36439 /* A subroutine of ix86_expand_vector_init_general. Use vector
36440 interleave to handle the most general case: all values variable,
36441 and none identical. */
36444 ix86_expand_vector_init_interleave (enum machine_mode mode,
36445 rtx target, rtx *ops, int n)
36447 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
36450 rtx (*gen_load_even) (rtx, rtx, rtx);
36451 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
36452 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
36457 gen_load_even = gen_vec_setv8hi;
36458 gen_interleave_first_low = gen_vec_interleave_lowv4si;
36459 gen_interleave_second_low = gen_vec_interleave_lowv2di;
36460 inner_mode = HImode;
36461 first_imode = V4SImode;
36462 second_imode = V2DImode;
36463 third_imode = VOIDmode;
36466 gen_load_even = gen_vec_setv16qi;
36467 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
36468 gen_interleave_second_low = gen_vec_interleave_lowv4si;
36469 inner_mode = QImode;
36470 first_imode = V8HImode;
36471 second_imode = V4SImode;
36472 third_imode = V2DImode;
36475 gcc_unreachable ();
36478 for (i = 0; i < n; i++)
36480 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36481 op0 = gen_reg_rtx (SImode);
36482 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
36484 /* Insert the SImode value as low element of V4SImode vector. */
36485 op1 = gen_reg_rtx (V4SImode);
36486 op0 = gen_rtx_VEC_MERGE (V4SImode,
36487 gen_rtx_VEC_DUPLICATE (V4SImode,
36489 CONST0_RTX (V4SImode),
36491 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
36493 /* Cast the V4SImode vector back to a vector in orignal mode. */
36494 op0 = gen_reg_rtx (mode);
36495 emit_move_insn (op0, gen_lowpart (mode, op1));
36497 /* Load even elements into the second positon. */
36498 emit_insn (gen_load_even (op0,
36499 force_reg (inner_mode,
36503 /* Cast vector to FIRST_IMODE vector. */
36504 ops[i] = gen_reg_rtx (first_imode);
36505 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
36508 /* Interleave low FIRST_IMODE vectors. */
36509 for (i = j = 0; i < n; i += 2, j++)
36511 op0 = gen_reg_rtx (first_imode);
36512 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
36514 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36515 ops[j] = gen_reg_rtx (second_imode);
36516 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
36519 /* Interleave low SECOND_IMODE vectors. */
36520 switch (second_imode)
36523 for (i = j = 0; i < n / 2; i += 2, j++)
36525 op0 = gen_reg_rtx (second_imode);
36526 emit_insn (gen_interleave_second_low (op0, ops[i],
36529 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36531 ops[j] = gen_reg_rtx (third_imode);
36532 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
36534 second_imode = V2DImode;
36535 gen_interleave_second_low = gen_vec_interleave_lowv2di;
36539 op0 = gen_reg_rtx (second_imode);
36540 emit_insn (gen_interleave_second_low (op0, ops[0],
36543 /* Cast the SECOND_IMODE vector back to a vector on original
36545 emit_insn (gen_rtx_SET (VOIDmode, target,
36546 gen_lowpart (mode, op0)));
36550 gcc_unreachable ();
36554 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36555 all values variable, and none identical. */
36558 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
36559 rtx target, rtx vals)
36561 rtx ops[32], op0, op1;
36562 enum machine_mode half_mode = VOIDmode;
36569 if (!mmx_ok && !TARGET_SSE)
36581 n = GET_MODE_NUNITS (mode);
36582 for (i = 0; i < n; i++)
36583 ops[i] = XVECEXP (vals, 0, i);
36584 ix86_expand_vector_init_concat (mode, target, ops, n);
36588 half_mode = V16QImode;
36592 half_mode = V8HImode;
36596 n = GET_MODE_NUNITS (mode);
36597 for (i = 0; i < n; i++)
36598 ops[i] = XVECEXP (vals, 0, i);
36599 op0 = gen_reg_rtx (half_mode);
36600 op1 = gen_reg_rtx (half_mode);
36601 ix86_expand_vector_init_interleave (half_mode, op0, ops,
36603 ix86_expand_vector_init_interleave (half_mode, op1,
36604 &ops [n >> 1], n >> 2);
36605 emit_insn (gen_rtx_SET (VOIDmode, target,
36606 gen_rtx_VEC_CONCAT (mode, op0, op1)));
36610 if (!TARGET_SSE4_1)
36618 /* Don't use ix86_expand_vector_init_interleave if we can't
36619 move from GPR to SSE register directly. */
36620 if (!TARGET_INTER_UNIT_MOVES)
36623 n = GET_MODE_NUNITS (mode);
36624 for (i = 0; i < n; i++)
36625 ops[i] = XVECEXP (vals, 0, i);
36626 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
36634 gcc_unreachable ();
36638 int i, j, n_elts, n_words, n_elt_per_word;
36639 enum machine_mode inner_mode;
36640 rtx words[4], shift;
36642 inner_mode = GET_MODE_INNER (mode);
36643 n_elts = GET_MODE_NUNITS (mode);
36644 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
36645 n_elt_per_word = n_elts / n_words;
36646 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
36648 for (i = 0; i < n_words; ++i)
36650 rtx word = NULL_RTX;
36652 for (j = 0; j < n_elt_per_word; ++j)
36654 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
36655 elt = convert_modes (word_mode, inner_mode, elt, true);
36661 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
36662 word, 1, OPTAB_LIB_WIDEN);
36663 word = expand_simple_binop (word_mode, IOR, word, elt,
36664 word, 1, OPTAB_LIB_WIDEN);
36672 emit_move_insn (target, gen_lowpart (mode, words[0]));
36673 else if (n_words == 2)
36675 rtx tmp = gen_reg_rtx (mode);
36676 emit_clobber (tmp);
36677 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
36678 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
36679 emit_move_insn (target, tmp);
36681 else if (n_words == 4)
36683 rtx tmp = gen_reg_rtx (V4SImode);
36684 gcc_assert (word_mode == SImode);
36685 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
36686 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
36687 emit_move_insn (target, gen_lowpart (mode, tmp));
36690 gcc_unreachable ();
36694 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36695 instructions unless MMX_OK is true. */
36698 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
36700 enum machine_mode mode = GET_MODE (target);
36701 enum machine_mode inner_mode = GET_MODE_INNER (mode);
36702 int n_elts = GET_MODE_NUNITS (mode);
36703 int n_var = 0, one_var = -1;
36704 bool all_same = true, all_const_zero = true;
36708 for (i = 0; i < n_elts; ++i)
36710 x = XVECEXP (vals, 0, i);
36711 if (!(CONST_INT_P (x)
36712 || GET_CODE (x) == CONST_DOUBLE
36713 || GET_CODE (x) == CONST_FIXED))
36714 n_var++, one_var = i;
36715 else if (x != CONST0_RTX (inner_mode))
36716 all_const_zero = false;
36717 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
36721 /* Constants are best loaded from the constant pool. */
36724 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
36728 /* If all values are identical, broadcast the value. */
36730 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
36731 XVECEXP (vals, 0, 0)))
36734 /* Values where only one field is non-constant are best loaded from
36735 the pool and overwritten via move later. */
36739 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
36740 XVECEXP (vals, 0, one_var),
36744 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
36748 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
36752 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
36754 enum machine_mode mode = GET_MODE (target);
36755 enum machine_mode inner_mode = GET_MODE_INNER (mode);
36756 enum machine_mode half_mode;
36757 bool use_vec_merge = false;
36759 static rtx (*gen_extract[6][2]) (rtx, rtx)
36761 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
36762 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
36763 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
36764 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
36765 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
36766 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
36768 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
36770 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
36771 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
36772 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
36773 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
36774 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
36775 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
36785 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
36786 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
36788 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
36790 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
36791 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36797 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
36801 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
36802 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
36804 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
36806 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
36807 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36814 /* For the two element vectors, we implement a VEC_CONCAT with
36815 the extraction of the other element. */
36817 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
36818 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
36821 op0 = val, op1 = tmp;
36823 op0 = tmp, op1 = val;
36825 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
36826 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36831 use_vec_merge = TARGET_SSE4_1;
36838 use_vec_merge = true;
36842 /* tmp = target = A B C D */
36843 tmp = copy_to_reg (target);
36844 /* target = A A B B */
36845 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
36846 /* target = X A B B */
36847 ix86_expand_vector_set (false, target, val, 0);
36848 /* target = A X C D */
36849 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
36850 const1_rtx, const0_rtx,
36851 GEN_INT (2+4), GEN_INT (3+4)));
36855 /* tmp = target = A B C D */
36856 tmp = copy_to_reg (target);
36857 /* tmp = X B C D */
36858 ix86_expand_vector_set (false, tmp, val, 0);
36859 /* target = A B X D */
36860 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
36861 const0_rtx, const1_rtx,
36862 GEN_INT (0+4), GEN_INT (3+4)));
36866 /* tmp = target = A B C D */
36867 tmp = copy_to_reg (target);
36868 /* tmp = X B C D */
36869 ix86_expand_vector_set (false, tmp, val, 0);
36870 /* target = A B X D */
36871 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
36872 const0_rtx, const1_rtx,
36873 GEN_INT (2+4), GEN_INT (0+4)));
36877 gcc_unreachable ();
36882 use_vec_merge = TARGET_SSE4_1;
36886 /* Element 0 handled by vec_merge below. */
36889 use_vec_merge = true;
36895 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36896 store into element 0, then shuffle them back. */
36900 order[0] = GEN_INT (elt);
36901 order[1] = const1_rtx;
36902 order[2] = const2_rtx;
36903 order[3] = GEN_INT (3);
36904 order[elt] = const0_rtx;
36906 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
36907 order[1], order[2], order[3]));
36909 ix86_expand_vector_set (false, target, val, 0);
36911 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
36912 order[1], order[2], order[3]));
36916 /* For SSE1, we have to reuse the V4SF code. */
36917 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
36918 gen_lowpart (SFmode, val), elt);
36923 use_vec_merge = TARGET_SSE2;
36926 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
36930 use_vec_merge = TARGET_SSE4_1;
36937 half_mode = V16QImode;
36943 half_mode = V8HImode;
36949 half_mode = V4SImode;
36955 half_mode = V2DImode;
36961 half_mode = V4SFmode;
36967 half_mode = V2DFmode;
36973 /* Compute offset. */
36977 gcc_assert (i <= 1);
36979 /* Extract the half. */
36980 tmp = gen_reg_rtx (half_mode);
36981 emit_insn (gen_extract[j][i] (tmp, target));
36983 /* Put val in tmp at elt. */
36984 ix86_expand_vector_set (false, tmp, val, elt);
36987 emit_insn (gen_insert[j][i] (target, target, tmp));
36996 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
36997 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
36998 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
37002 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37004 emit_move_insn (mem, target);
37006 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
37007 emit_move_insn (tmp, val);
37009 emit_move_insn (target, mem);
37014 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
37016 enum machine_mode mode = GET_MODE (vec);
37017 enum machine_mode inner_mode = GET_MODE_INNER (mode);
37018 bool use_vec_extr = false;
37031 use_vec_extr = true;
37035 use_vec_extr = TARGET_SSE4_1;
37047 tmp = gen_reg_rtx (mode);
37048 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
37049 GEN_INT (elt), GEN_INT (elt),
37050 GEN_INT (elt+4), GEN_INT (elt+4)));
37054 tmp = gen_reg_rtx (mode);
37055 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
37059 gcc_unreachable ();
37062 use_vec_extr = true;
37067 use_vec_extr = TARGET_SSE4_1;
37081 tmp = gen_reg_rtx (mode);
37082 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
37083 GEN_INT (elt), GEN_INT (elt),
37084 GEN_INT (elt), GEN_INT (elt)));
37088 tmp = gen_reg_rtx (mode);
37089 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
37093 gcc_unreachable ();
37096 use_vec_extr = true;
37101 /* For SSE1, we have to reuse the V4SF code. */
37102 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
37103 gen_lowpart (V4SFmode, vec), elt);
37109 use_vec_extr = TARGET_SSE2;
37112 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
37116 use_vec_extr = TARGET_SSE4_1;
37122 tmp = gen_reg_rtx (V4SFmode);
37124 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
37126 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
37127 ix86_expand_vector_extract (false, target, tmp, elt & 3);
37135 tmp = gen_reg_rtx (V2DFmode);
37137 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
37139 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
37140 ix86_expand_vector_extract (false, target, tmp, elt & 1);
37148 tmp = gen_reg_rtx (V16QImode);
37150 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
37152 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
37153 ix86_expand_vector_extract (false, target, tmp, elt & 15);
37161 tmp = gen_reg_rtx (V8HImode);
37163 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
37165 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
37166 ix86_expand_vector_extract (false, target, tmp, elt & 7);
37174 tmp = gen_reg_rtx (V4SImode);
37176 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
37178 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
37179 ix86_expand_vector_extract (false, target, tmp, elt & 3);
37187 tmp = gen_reg_rtx (V2DImode);
37189 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
37191 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
37192 ix86_expand_vector_extract (false, target, tmp, elt & 1);
37198 /* ??? Could extract the appropriate HImode element and shift. */
37205 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
37206 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
37208 /* Let the rtl optimizers know about the zero extension performed. */
37209 if (inner_mode == QImode || inner_mode == HImode)
37211 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
37212 target = gen_lowpart (SImode, target);
37215 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
37219 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37221 emit_move_insn (mem, vec);
37223 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
37224 emit_move_insn (target, tmp);
37228 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37229 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37230 The upper bits of DEST are undefined, though they shouldn't cause
37231 exceptions (some bits from src or all zeros are ok). */
37234 emit_reduc_half (rtx dest, rtx src, int i)
37237 switch (GET_MODE (src))
37241 tem = gen_sse_movhlps (dest, src, src);
37243 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
37244 GEN_INT (1 + 4), GEN_INT (1 + 4));
37247 tem = gen_vec_interleave_highv2df (dest, src, src);
37253 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
37254 gen_lowpart (V1TImode, src),
37259 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
37261 tem = gen_avx_shufps256 (dest, src, src,
37262 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
37266 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
37268 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
37275 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
37276 gen_lowpart (V4DImode, src),
37277 gen_lowpart (V4DImode, src),
37280 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
37281 gen_lowpart (V2TImode, src),
37285 gcc_unreachable ();
37290 /* Expand a vector reduction. FN is the binary pattern to reduce;
37291 DEST is the destination; IN is the input vector. */
37294 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
37296 rtx half, dst, vec = in;
37297 enum machine_mode mode = GET_MODE (in);
37300 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37302 && mode == V8HImode
37303 && fn == gen_uminv8hi3)
37305 emit_insn (gen_sse4_1_phminposuw (dest, in));
37309 for (i = GET_MODE_BITSIZE (mode);
37310 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
37313 half = gen_reg_rtx (mode);
37314 emit_reduc_half (half, vec, i);
37315 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
37318 dst = gen_reg_rtx (mode);
37319 emit_insn (fn (dst, half, vec));
37324 /* Target hook for scalar_mode_supported_p. */
37326 ix86_scalar_mode_supported_p (enum machine_mode mode)
37328 if (DECIMAL_FLOAT_MODE_P (mode))
37329 return default_decimal_float_supported_p ();
37330 else if (mode == TFmode)
37333 return default_scalar_mode_supported_p (mode);
37336 /* Implements target hook vector_mode_supported_p. */
37338 ix86_vector_mode_supported_p (enum machine_mode mode)
37340 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37342 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37344 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37346 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
37348 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
37353 /* Target hook for c_mode_for_suffix. */
37354 static enum machine_mode
37355 ix86_c_mode_for_suffix (char suffix)
37365 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37367 We do this in the new i386 backend to maintain source compatibility
37368 with the old cc0-based compiler. */
37371 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
37372 tree inputs ATTRIBUTE_UNUSED,
37375 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
37377 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
37382 /* Implements target vector targetm.asm.encode_section_info. */
37384 static void ATTRIBUTE_UNUSED
37385 ix86_encode_section_info (tree decl, rtx rtl, int first)
37387 default_encode_section_info (decl, rtl, first);
37389 if (TREE_CODE (decl) == VAR_DECL
37390 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
37391 && ix86_in_large_data_p (decl))
37392 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
37395 /* Worker function for REVERSE_CONDITION. */
37398 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
37400 return (mode != CCFPmode && mode != CCFPUmode
37401 ? reverse_condition (code)
37402 : reverse_condition_maybe_unordered (code));
37405 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37409 output_387_reg_move (rtx insn, rtx *operands)
37411 if (REG_P (operands[0]))
37413 if (REG_P (operands[1])
37414 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
37416 if (REGNO (operands[0]) == FIRST_STACK_REG)
37417 return output_387_ffreep (operands, 0);
37418 return "fstp\t%y0";
37420 if (STACK_TOP_P (operands[0]))
37421 return "fld%Z1\t%y1";
37424 else if (MEM_P (operands[0]))
37426 gcc_assert (REG_P (operands[1]));
37427 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
37428 return "fstp%Z0\t%y0";
37431 /* There is no non-popping store to memory for XFmode.
37432 So if we need one, follow the store with a load. */
37433 if (GET_MODE (operands[0]) == XFmode)
37434 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37436 return "fst%Z0\t%y0";
37443 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37444 FP status register is set. */
37447 ix86_emit_fp_unordered_jump (rtx label)
37449 rtx reg = gen_reg_rtx (HImode);
37452 emit_insn (gen_x86_fnstsw_1 (reg));
37454 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
37456 emit_insn (gen_x86_sahf_1 (reg));
37458 temp = gen_rtx_REG (CCmode, FLAGS_REG);
37459 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
37463 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
37465 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
37466 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
37469 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
37470 gen_rtx_LABEL_REF (VOIDmode, label),
37472 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
37474 emit_jump_insn (temp);
37475 predict_jump (REG_BR_PROB_BASE * 10 / 100);
37478 /* Output code to perform a log1p XFmode calculation. */
37480 void ix86_emit_i387_log1p (rtx op0, rtx op1)
37482 rtx label1 = gen_label_rtx ();
37483 rtx label2 = gen_label_rtx ();
37485 rtx tmp = gen_reg_rtx (XFmode);
37486 rtx tmp2 = gen_reg_rtx (XFmode);
37489 emit_insn (gen_absxf2 (tmp, op1));
37490 test = gen_rtx_GE (VOIDmode, tmp,
37491 CONST_DOUBLE_FROM_REAL_VALUE (
37492 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
37494 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
37496 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
37497 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
37498 emit_jump (label2);
37500 emit_label (label1);
37501 emit_move_insn (tmp, CONST1_RTX (XFmode));
37502 emit_insn (gen_addxf3 (tmp, op1, tmp));
37503 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
37504 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
37506 emit_label (label2);
37509 /* Emit code for round calculation. */
37510 void ix86_emit_i387_round (rtx op0, rtx op1)
37512 enum machine_mode inmode = GET_MODE (op1);
37513 enum machine_mode outmode = GET_MODE (op0);
37514 rtx e1, e2, res, tmp, tmp1, half;
37515 rtx scratch = gen_reg_rtx (HImode);
37516 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
37517 rtx jump_label = gen_label_rtx ();
37519 rtx (*gen_abs) (rtx, rtx);
37520 rtx (*gen_neg) (rtx, rtx);
37525 gen_abs = gen_abssf2;
37528 gen_abs = gen_absdf2;
37531 gen_abs = gen_absxf2;
37534 gcc_unreachable ();
37540 gen_neg = gen_negsf2;
37543 gen_neg = gen_negdf2;
37546 gen_neg = gen_negxf2;
37549 gen_neg = gen_neghi2;
37552 gen_neg = gen_negsi2;
37555 gen_neg = gen_negdi2;
37558 gcc_unreachable ();
37561 e1 = gen_reg_rtx (inmode);
37562 e2 = gen_reg_rtx (inmode);
37563 res = gen_reg_rtx (outmode);
37565 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
37567 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37569 /* scratch = fxam(op1) */
37570 emit_insn (gen_rtx_SET (VOIDmode, scratch,
37571 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
37573 /* e1 = fabs(op1) */
37574 emit_insn (gen_abs (e1, op1));
37576 /* e2 = e1 + 0.5 */
37577 half = force_reg (inmode, half);
37578 emit_insn (gen_rtx_SET (VOIDmode, e2,
37579 gen_rtx_PLUS (inmode, e1, half)));
37581 /* res = floor(e2) */
37582 if (inmode != XFmode)
37584 tmp1 = gen_reg_rtx (XFmode);
37586 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
37587 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
37597 rtx tmp0 = gen_reg_rtx (XFmode);
37599 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
37601 emit_insn (gen_rtx_SET (VOIDmode, res,
37602 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
37603 UNSPEC_TRUNC_NOOP)));
37607 emit_insn (gen_frndintxf2_floor (res, tmp1));
37610 emit_insn (gen_lfloorxfhi2 (res, tmp1));
37613 emit_insn (gen_lfloorxfsi2 (res, tmp1));
37616 emit_insn (gen_lfloorxfdi2 (res, tmp1));
37619 gcc_unreachable ();
37622 /* flags = signbit(a) */
37623 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
37625 /* if (flags) then res = -res */
37626 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
37627 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
37628 gen_rtx_LABEL_REF (VOIDmode, jump_label),
37630 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
37631 predict_jump (REG_BR_PROB_BASE * 50 / 100);
37632 JUMP_LABEL (insn) = jump_label;
37634 emit_insn (gen_neg (res, res));
37636 emit_label (jump_label);
37637 LABEL_NUSES (jump_label) = 1;
37639 emit_move_insn (op0, res);
37642 /* Output code to perform a Newton-Rhapson approximation of a single precision
37643 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37645 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
37647 rtx x0, x1, e0, e1;
37649 x0 = gen_reg_rtx (mode);
37650 e0 = gen_reg_rtx (mode);
37651 e1 = gen_reg_rtx (mode);
37652 x1 = gen_reg_rtx (mode);
37654 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37656 b = force_reg (mode, b);
37658 /* x0 = rcp(b) estimate */
37659 emit_insn (gen_rtx_SET (VOIDmode, x0,
37660 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
37663 emit_insn (gen_rtx_SET (VOIDmode, e0,
37664 gen_rtx_MULT (mode, x0, b)));
37667 emit_insn (gen_rtx_SET (VOIDmode, e0,
37668 gen_rtx_MULT (mode, x0, e0)));
37671 emit_insn (gen_rtx_SET (VOIDmode, e1,
37672 gen_rtx_PLUS (mode, x0, x0)));
37675 emit_insn (gen_rtx_SET (VOIDmode, x1,
37676 gen_rtx_MINUS (mode, e1, e0)));
37679 emit_insn (gen_rtx_SET (VOIDmode, res,
37680 gen_rtx_MULT (mode, a, x1)));
37683 /* Output code to perform a Newton-Rhapson approximation of a
37684 single precision floating point [reciprocal] square root. */
37686 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
37689 rtx x0, e0, e1, e2, e3, mthree, mhalf;
37692 x0 = gen_reg_rtx (mode);
37693 e0 = gen_reg_rtx (mode);
37694 e1 = gen_reg_rtx (mode);
37695 e2 = gen_reg_rtx (mode);
37696 e3 = gen_reg_rtx (mode);
37698 real_from_integer (&r, VOIDmode, -3, -1, 0);
37699 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
37701 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
37702 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
37704 if (VECTOR_MODE_P (mode))
37706 mthree = ix86_build_const_vector (mode, true, mthree);
37707 mhalf = ix86_build_const_vector (mode, true, mhalf);
37710 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37711 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37713 a = force_reg (mode, a);
37715 /* x0 = rsqrt(a) estimate */
37716 emit_insn (gen_rtx_SET (VOIDmode, x0,
37717 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
37720 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37725 zero = gen_reg_rtx (mode);
37726 mask = gen_reg_rtx (mode);
37728 zero = force_reg (mode, CONST0_RTX(mode));
37729 emit_insn (gen_rtx_SET (VOIDmode, mask,
37730 gen_rtx_NE (mode, zero, a)));
37732 emit_insn (gen_rtx_SET (VOIDmode, x0,
37733 gen_rtx_AND (mode, x0, mask)));
37737 emit_insn (gen_rtx_SET (VOIDmode, e0,
37738 gen_rtx_MULT (mode, x0, a)));
37740 emit_insn (gen_rtx_SET (VOIDmode, e1,
37741 gen_rtx_MULT (mode, e0, x0)));
37744 mthree = force_reg (mode, mthree);
37745 emit_insn (gen_rtx_SET (VOIDmode, e2,
37746 gen_rtx_PLUS (mode, e1, mthree)));
37748 mhalf = force_reg (mode, mhalf);
37750 /* e3 = -.5 * x0 */
37751 emit_insn (gen_rtx_SET (VOIDmode, e3,
37752 gen_rtx_MULT (mode, x0, mhalf)));
37754 /* e3 = -.5 * e0 */
37755 emit_insn (gen_rtx_SET (VOIDmode, e3,
37756 gen_rtx_MULT (mode, e0, mhalf)));
37757 /* ret = e2 * e3 */
37758 emit_insn (gen_rtx_SET (VOIDmode, res,
37759 gen_rtx_MULT (mode, e2, e3)));
37762 #ifdef TARGET_SOLARIS
37763 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37766 i386_solaris_elf_named_section (const char *name, unsigned int flags,
37769 /* With Binutils 2.15, the "@unwind" marker must be specified on
37770 every occurrence of the ".eh_frame" section, not just the first
37773 && strcmp (name, ".eh_frame") == 0)
37775 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
37776 flags & SECTION_WRITE ? "aw" : "a");
37781 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
37783 solaris_elf_asm_comdat_section (name, flags, decl);
37788 default_elf_asm_named_section (name, flags, decl);
37790 #endif /* TARGET_SOLARIS */
37792 /* Return the mangling of TYPE if it is an extended fundamental type. */
37794 static const char *
37795 ix86_mangle_type (const_tree type)
37797 type = TYPE_MAIN_VARIANT (type);
37799 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
37800 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
37803 switch (TYPE_MODE (type))
37806 /* __float128 is "g". */
37809 /* "long double" or __float80 is "e". */
37816 /* For 32-bit code we can save PIC register setup by using
37817 __stack_chk_fail_local hidden function instead of calling
37818 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37819 register, so it is better to call __stack_chk_fail directly. */
37821 static tree ATTRIBUTE_UNUSED
37822 ix86_stack_protect_fail (void)
37824 return TARGET_64BIT
37825 ? default_external_stack_protect_fail ()
37826 : default_hidden_stack_protect_fail ();
37829 /* Select a format to encode pointers in exception handling data. CODE
37830 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37831 true if the symbol may be affected by dynamic relocations.
37833 ??? All x86 object file formats are capable of representing this.
37834 After all, the relocation needed is the same as for the call insn.
37835 Whether or not a particular assembler allows us to enter such, I
37836 guess we'll have to see. */
37838 asm_preferred_eh_data_format (int code, int global)
37842 int type = DW_EH_PE_sdata8;
37844 || ix86_cmodel == CM_SMALL_PIC
37845 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
37846 type = DW_EH_PE_sdata4;
37847 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
37849 if (ix86_cmodel == CM_SMALL
37850 || (ix86_cmodel == CM_MEDIUM && code))
37851 return DW_EH_PE_udata4;
37852 return DW_EH_PE_absptr;
37855 /* Expand copysign from SIGN to the positive value ABS_VALUE
37856 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37859 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
37861 enum machine_mode mode = GET_MODE (sign);
37862 rtx sgn = gen_reg_rtx (mode);
37863 if (mask == NULL_RTX)
37865 enum machine_mode vmode;
37867 if (mode == SFmode)
37869 else if (mode == DFmode)
37874 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
37875 if (!VECTOR_MODE_P (mode))
37877 /* We need to generate a scalar mode mask in this case. */
37878 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
37879 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
37880 mask = gen_reg_rtx (mode);
37881 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
37885 mask = gen_rtx_NOT (mode, mask);
37886 emit_insn (gen_rtx_SET (VOIDmode, sgn,
37887 gen_rtx_AND (mode, mask, sign)));
37888 emit_insn (gen_rtx_SET (VOIDmode, result,
37889 gen_rtx_IOR (mode, abs_value, sgn)));
37892 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37893 mask for masking out the sign-bit is stored in *SMASK, if that is
37896 ix86_expand_sse_fabs (rtx op0, rtx *smask)
37898 enum machine_mode vmode, mode = GET_MODE (op0);
37901 xa = gen_reg_rtx (mode);
37902 if (mode == SFmode)
37904 else if (mode == DFmode)
37908 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
37909 if (!VECTOR_MODE_P (mode))
37911 /* We need to generate a scalar mode mask in this case. */
37912 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
37913 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
37914 mask = gen_reg_rtx (mode);
37915 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
37917 emit_insn (gen_rtx_SET (VOIDmode, xa,
37918 gen_rtx_AND (mode, op0, mask)));
37926 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37927 swapping the operands if SWAP_OPERANDS is true. The expanded
37928 code is a forward jump to a newly created label in case the
37929 comparison is true. The generated label rtx is returned. */
37931 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
37932 bool swap_operands)
37943 label = gen_label_rtx ();
37944 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
37945 emit_insn (gen_rtx_SET (VOIDmode, tmp,
37946 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
37947 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
37948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
37949 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
37950 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
37951 JUMP_LABEL (tmp) = label;
37956 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37957 using comparison code CODE. Operands are swapped for the comparison if
37958 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37960 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
37961 bool swap_operands)
37963 rtx (*insn)(rtx, rtx, rtx, rtx);
37964 enum machine_mode mode = GET_MODE (op0);
37965 rtx mask = gen_reg_rtx (mode);
37974 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
37976 emit_insn (insn (mask, op0, op1,
37977 gen_rtx_fmt_ee (code, mode, op0, op1)));
37981 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37982 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37984 ix86_gen_TWO52 (enum machine_mode mode)
37986 REAL_VALUE_TYPE TWO52r;
37989 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
37990 TWO52 = const_double_from_real_value (TWO52r, mode);
37991 TWO52 = force_reg (mode, TWO52);
37996 /* Expand SSE sequence for computing lround from OP1 storing
37999 ix86_expand_lround (rtx op0, rtx op1)
38001 /* C code for the stuff we're doing below:
38002 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38005 enum machine_mode mode = GET_MODE (op1);
38006 const struct real_format *fmt;
38007 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
38010 /* load nextafter (0.5, 0.0) */
38011 fmt = REAL_MODE_FORMAT (mode);
38012 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
38013 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
38015 /* adj = copysign (0.5, op1) */
38016 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
38017 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
38019 /* adj = op1 + adj */
38020 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
38022 /* op0 = (imode)adj */
38023 expand_fix (op0, adj, 0);
38026 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38029 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
38031 /* C code for the stuff we're doing below (for do_floor):
38033 xi -= (double)xi > op1 ? 1 : 0;
38036 enum machine_mode fmode = GET_MODE (op1);
38037 enum machine_mode imode = GET_MODE (op0);
38038 rtx ireg, freg, label, tmp;
38040 /* reg = (long)op1 */
38041 ireg = gen_reg_rtx (imode);
38042 expand_fix (ireg, op1, 0);
38044 /* freg = (double)reg */
38045 freg = gen_reg_rtx (fmode);
38046 expand_float (freg, ireg, 0);
38048 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38049 label = ix86_expand_sse_compare_and_jump (UNLE,
38050 freg, op1, !do_floor);
38051 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
38052 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
38053 emit_move_insn (ireg, tmp);
38055 emit_label (label);
38056 LABEL_NUSES (label) = 1;
38058 emit_move_insn (op0, ireg);
38061 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38062 result in OPERAND0. */
38064 ix86_expand_rint (rtx operand0, rtx operand1)
38066 /* C code for the stuff we're doing below:
38067 xa = fabs (operand1);
38068 if (!isless (xa, 2**52))
38070 xa = xa + 2**52 - 2**52;
38071 return copysign (xa, operand1);
38073 enum machine_mode mode = GET_MODE (operand0);
38074 rtx res, xa, label, TWO52, mask;
38076 res = gen_reg_rtx (mode);
38077 emit_move_insn (res, operand1);
38079 /* xa = abs (operand1) */
38080 xa = ix86_expand_sse_fabs (res, &mask);
38082 /* if (!isless (xa, TWO52)) goto label; */
38083 TWO52 = ix86_gen_TWO52 (mode);
38084 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38086 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38087 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
38089 ix86_sse_copysign_to_positive (res, xa, res, mask);
38091 emit_label (label);
38092 LABEL_NUSES (label) = 1;
38094 emit_move_insn (operand0, res);
38097 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38100 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
38102 /* C code for the stuff we expand below.
38103 double xa = fabs (x), x2;
38104 if (!isless (xa, TWO52))
38106 xa = xa + TWO52 - TWO52;
38107 x2 = copysign (xa, x);
38116 enum machine_mode mode = GET_MODE (operand0);
38117 rtx xa, TWO52, tmp, label, one, res, mask;
38119 TWO52 = ix86_gen_TWO52 (mode);
38121 /* Temporary for holding the result, initialized to the input
38122 operand to ease control flow. */
38123 res = gen_reg_rtx (mode);
38124 emit_move_insn (res, operand1);
38126 /* xa = abs (operand1) */
38127 xa = ix86_expand_sse_fabs (res, &mask);
38129 /* if (!isless (xa, TWO52)) goto label; */
38130 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38132 /* xa = xa + TWO52 - TWO52; */
38133 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38134 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
38136 /* xa = copysign (xa, operand1) */
38137 ix86_sse_copysign_to_positive (xa, xa, res, mask);
38139 /* generate 1.0 or -1.0 */
38140 one = force_reg (mode,
38141 const_double_from_real_value (do_floor
38142 ? dconst1 : dconstm1, mode));
38144 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38145 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
38146 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38147 gen_rtx_AND (mode, one, tmp)));
38148 /* We always need to subtract here to preserve signed zero. */
38149 tmp = expand_simple_binop (mode, MINUS,
38150 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38151 emit_move_insn (res, tmp);
38153 emit_label (label);
38154 LABEL_NUSES (label) = 1;
38156 emit_move_insn (operand0, res);
38159 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38162 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
38164 /* C code for the stuff we expand below.
38165 double xa = fabs (x), x2;
38166 if (!isless (xa, TWO52))
38168 x2 = (double)(long)x;
38175 if (HONOR_SIGNED_ZEROS (mode))
38176 return copysign (x2, x);
38179 enum machine_mode mode = GET_MODE (operand0);
38180 rtx xa, xi, TWO52, tmp, label, one, res, mask;
38182 TWO52 = ix86_gen_TWO52 (mode);
38184 /* Temporary for holding the result, initialized to the input
38185 operand to ease control flow. */
38186 res = gen_reg_rtx (mode);
38187 emit_move_insn (res, operand1);
38189 /* xa = abs (operand1) */
38190 xa = ix86_expand_sse_fabs (res, &mask);
38192 /* if (!isless (xa, TWO52)) goto label; */
38193 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38195 /* xa = (double)(long)x */
38196 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
38197 expand_fix (xi, res, 0);
38198 expand_float (xa, xi, 0);
38201 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
38203 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38204 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
38205 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38206 gen_rtx_AND (mode, one, tmp)));
38207 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
38208 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38209 emit_move_insn (res, tmp);
38211 if (HONOR_SIGNED_ZEROS (mode))
38212 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
38214 emit_label (label);
38215 LABEL_NUSES (label) = 1;
38217 emit_move_insn (operand0, res);
38220 /* Expand SSE sequence for computing round from OPERAND1 storing
38221 into OPERAND0. Sequence that works without relying on DImode truncation
38222 via cvttsd2siq that is only available on 64bit targets. */
38224 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
38226 /* C code for the stuff we expand below.
38227 double xa = fabs (x), xa2, x2;
38228 if (!isless (xa, TWO52))
38230 Using the absolute value and copying back sign makes
38231 -0.0 -> -0.0 correct.
38232 xa2 = xa + TWO52 - TWO52;
38237 else if (dxa > 0.5)
38239 x2 = copysign (xa2, x);
38242 enum machine_mode mode = GET_MODE (operand0);
38243 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
38245 TWO52 = ix86_gen_TWO52 (mode);
38247 /* Temporary for holding the result, initialized to the input
38248 operand to ease control flow. */
38249 res = gen_reg_rtx (mode);
38250 emit_move_insn (res, operand1);
38252 /* xa = abs (operand1) */
38253 xa = ix86_expand_sse_fabs (res, &mask);
38255 /* if (!isless (xa, TWO52)) goto label; */
38256 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38258 /* xa2 = xa + TWO52 - TWO52; */
38259 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38260 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
38262 /* dxa = xa2 - xa; */
38263 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
38265 /* generate 0.5, 1.0 and -0.5 */
38266 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
38267 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
38268 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
38272 tmp = gen_reg_rtx (mode);
38273 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38274 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
38275 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38276 gen_rtx_AND (mode, one, tmp)));
38277 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38278 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38279 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
38280 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38281 gen_rtx_AND (mode, one, tmp)));
38282 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38284 /* res = copysign (xa2, operand1) */
38285 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
38287 emit_label (label);
38288 LABEL_NUSES (label) = 1;
38290 emit_move_insn (operand0, res);
38293 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38296 ix86_expand_trunc (rtx operand0, rtx operand1)
38298 /* C code for SSE variant we expand below.
38299 double xa = fabs (x), x2;
38300 if (!isless (xa, TWO52))
38302 x2 = (double)(long)x;
38303 if (HONOR_SIGNED_ZEROS (mode))
38304 return copysign (x2, x);
38307 enum machine_mode mode = GET_MODE (operand0);
38308 rtx xa, xi, TWO52, label, res, mask;
38310 TWO52 = ix86_gen_TWO52 (mode);
38312 /* Temporary for holding the result, initialized to the input
38313 operand to ease control flow. */
38314 res = gen_reg_rtx (mode);
38315 emit_move_insn (res, operand1);
38317 /* xa = abs (operand1) */
38318 xa = ix86_expand_sse_fabs (res, &mask);
38320 /* if (!isless (xa, TWO52)) goto label; */
38321 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38323 /* x = (double)(long)x */
38324 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
38325 expand_fix (xi, res, 0);
38326 expand_float (res, xi, 0);
38328 if (HONOR_SIGNED_ZEROS (mode))
38329 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
38331 emit_label (label);
38332 LABEL_NUSES (label) = 1;
38334 emit_move_insn (operand0, res);
38337 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38340 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
38342 enum machine_mode mode = GET_MODE (operand0);
38343 rtx xa, mask, TWO52, label, one, res, smask, tmp;
38345 /* C code for SSE variant we expand below.
38346 double xa = fabs (x), x2;
38347 if (!isless (xa, TWO52))
38349 xa2 = xa + TWO52 - TWO52;
38353 x2 = copysign (xa2, x);
38357 TWO52 = ix86_gen_TWO52 (mode);
38359 /* Temporary for holding the result, initialized to the input
38360 operand to ease control flow. */
38361 res = gen_reg_rtx (mode);
38362 emit_move_insn (res, operand1);
38364 /* xa = abs (operand1) */
38365 xa = ix86_expand_sse_fabs (res, &smask);
38367 /* if (!isless (xa, TWO52)) goto label; */
38368 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38370 /* res = xa + TWO52 - TWO52; */
38371 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38372 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
38373 emit_move_insn (res, tmp);
38376 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
38378 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38379 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
38380 emit_insn (gen_rtx_SET (VOIDmode, mask,
38381 gen_rtx_AND (mode, mask, one)));
38382 tmp = expand_simple_binop (mode, MINUS,
38383 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
38384 emit_move_insn (res, tmp);
38386 /* res = copysign (res, operand1) */
38387 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
38389 emit_label (label);
38390 LABEL_NUSES (label) = 1;
38392 emit_move_insn (operand0, res);
38395 /* Expand SSE sequence for computing round from OPERAND1 storing
38398 ix86_expand_round (rtx operand0, rtx operand1)
38400 /* C code for the stuff we're doing below:
38401 double xa = fabs (x);
38402 if (!isless (xa, TWO52))
38404 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38405 return copysign (xa, x);
38407 enum machine_mode mode = GET_MODE (operand0);
38408 rtx res, TWO52, xa, label, xi, half, mask;
38409 const struct real_format *fmt;
38410 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
38412 /* Temporary for holding the result, initialized to the input
38413 operand to ease control flow. */
38414 res = gen_reg_rtx (mode);
38415 emit_move_insn (res, operand1);
38417 TWO52 = ix86_gen_TWO52 (mode);
38418 xa = ix86_expand_sse_fabs (res, &mask);
38419 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38421 /* load nextafter (0.5, 0.0) */
38422 fmt = REAL_MODE_FORMAT (mode);
38423 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
38424 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
38426 /* xa = xa + 0.5 */
38427 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
38428 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
38430 /* xa = (double)(int64_t)xa */
38431 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
38432 expand_fix (xi, xa, 0);
38433 expand_float (xa, xi, 0);
38435 /* res = copysign (xa, operand1) */
38436 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
38438 emit_label (label);
38439 LABEL_NUSES (label) = 1;
38441 emit_move_insn (operand0, res);
38444 /* Expand SSE sequence for computing round
38445 from OP1 storing into OP0 using sse4 round insn. */
38447 ix86_expand_round_sse4 (rtx op0, rtx op1)
38449 enum machine_mode mode = GET_MODE (op0);
38450 rtx e1, e2, res, half;
38451 const struct real_format *fmt;
38452 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
38453 rtx (*gen_copysign) (rtx, rtx, rtx);
38454 rtx (*gen_round) (rtx, rtx, rtx);
38459 gen_copysign = gen_copysignsf3;
38460 gen_round = gen_sse4_1_roundsf2;
38463 gen_copysign = gen_copysigndf3;
38464 gen_round = gen_sse4_1_rounddf2;
38467 gcc_unreachable ();
38470 /* round (a) = trunc (a + copysign (0.5, a)) */
38472 /* load nextafter (0.5, 0.0) */
38473 fmt = REAL_MODE_FORMAT (mode);
38474 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
38475 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
38476 half = const_double_from_real_value (pred_half, mode);
38478 /* e1 = copysign (0.5, op1) */
38479 e1 = gen_reg_rtx (mode);
38480 emit_insn (gen_copysign (e1, half, op1));
38482 /* e2 = op1 + e1 */
38483 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
38485 /* res = trunc (e2) */
38486 res = gen_reg_rtx (mode);
38487 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
38489 emit_move_insn (op0, res);
38493 /* Table of valid machine attributes. */
38494 static const struct attribute_spec ix86_attribute_table[] =
38496 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38497 affects_type_identity } */
38498 /* Stdcall attribute says callee is responsible for popping arguments
38499 if they are not variable. */
38500 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38502 /* Fastcall attribute says callee is responsible for popping arguments
38503 if they are not variable. */
38504 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38506 /* Thiscall attribute says callee is responsible for popping arguments
38507 if they are not variable. */
38508 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38510 /* Cdecl attribute says the callee is a normal C declaration */
38511 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38513 /* Regparm attribute specifies how many integer arguments are to be
38514 passed in registers. */
38515 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
38517 /* Sseregparm attribute says we are using x86_64 calling conventions
38518 for FP arguments. */
38519 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38521 /* The transactional memory builtins are implicitly regparm or fastcall
38522 depending on the ABI. Override the generic do-nothing attribute that
38523 these builtins were declared with. */
38524 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
38526 /* force_align_arg_pointer says this function realigns the stack at entry. */
38527 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
38528 false, true, true, ix86_handle_cconv_attribute, false },
38529 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38530 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
38531 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
38532 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
38535 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
38537 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
38539 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38540 SUBTARGET_ATTRIBUTE_TABLE,
38542 /* ms_abi and sysv_abi calling convention function attributes. */
38543 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
38544 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
38545 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
38547 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38548 ix86_handle_callee_pop_aggregate_return, true },
38550 { NULL, 0, 0, false, false, false, NULL, false }
38553 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38555 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
38557 int misalign ATTRIBUTE_UNUSED)
38561 switch (type_of_cost)
38564 return ix86_cost->scalar_stmt_cost;
38567 return ix86_cost->scalar_load_cost;
38570 return ix86_cost->scalar_store_cost;
38573 return ix86_cost->vec_stmt_cost;
38576 return ix86_cost->vec_align_load_cost;
38579 return ix86_cost->vec_store_cost;
38581 case vec_to_scalar:
38582 return ix86_cost->vec_to_scalar_cost;
38584 case scalar_to_vec:
38585 return ix86_cost->scalar_to_vec_cost;
38587 case unaligned_load:
38588 case unaligned_store:
38589 return ix86_cost->vec_unalign_load_cost;
38591 case cond_branch_taken:
38592 return ix86_cost->cond_taken_branch_cost;
38594 case cond_branch_not_taken:
38595 return ix86_cost->cond_not_taken_branch_cost;
38598 case vec_promote_demote:
38599 return ix86_cost->vec_stmt_cost;
38601 case vec_construct:
38602 elements = TYPE_VECTOR_SUBPARTS (vectype);
38603 return elements / 2 + 1;
38606 gcc_unreachable ();
38610 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38611 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38612 insn every time. */
38614 static GTY(()) rtx vselect_insn;
38616 /* Initialize vselect_insn. */
38619 init_vselect_insn (void)
38624 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
38625 for (i = 0; i < MAX_VECT_LEN; ++i)
38626 XVECEXP (x, 0, i) = const0_rtx;
38627 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
38629 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
38631 vselect_insn = emit_insn (x);
38635 /* Construct (set target (vec_select op0 (parallel perm))) and
38636 return true if that's a valid instruction in the active ISA. */
38639 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
38640 unsigned nelt, bool testing_p)
38643 rtx x, save_vconcat;
38646 if (vselect_insn == NULL_RTX)
38647 init_vselect_insn ();
38649 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
38650 PUT_NUM_ELEM (XVEC (x, 0), nelt);
38651 for (i = 0; i < nelt; ++i)
38652 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
38653 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
38654 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
38655 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
38656 SET_DEST (PATTERN (vselect_insn)) = target;
38657 icode = recog_memoized (vselect_insn);
38659 if (icode >= 0 && !testing_p)
38660 emit_insn (copy_rtx (PATTERN (vselect_insn)));
38662 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
38663 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
38664 INSN_CODE (vselect_insn) = -1;
38669 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38672 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
38673 const unsigned char *perm, unsigned nelt,
38676 enum machine_mode v2mode;
38680 if (vselect_insn == NULL_RTX)
38681 init_vselect_insn ();
38683 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
38684 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
38685 PUT_MODE (x, v2mode);
38688 ok = expand_vselect (target, x, perm, nelt, testing_p);
38689 XEXP (x, 0) = const0_rtx;
38690 XEXP (x, 1) = const0_rtx;
38694 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38695 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38698 expand_vec_perm_blend (struct expand_vec_perm_d *d)
38700 enum machine_mode vmode = d->vmode;
38701 unsigned i, mask, nelt = d->nelt;
38702 rtx target, op0, op1, x;
38703 rtx rperm[32], vperm;
38705 if (d->one_operand_p)
38707 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
38709 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
38711 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
38716 /* This is a blend, not a permute. Elements must stay in their
38717 respective lanes. */
38718 for (i = 0; i < nelt; ++i)
38720 unsigned e = d->perm[i];
38721 if (!(e == i || e == i + nelt))
38728 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38729 decision should be extracted elsewhere, so that we only try that
38730 sequence once all budget==3 options have been tried. */
38731 target = d->target;
38744 for (i = 0; i < nelt; ++i)
38745 mask |= (d->perm[i] >= nelt) << i;
38749 for (i = 0; i < 2; ++i)
38750 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
38755 for (i = 0; i < 4; ++i)
38756 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
38761 /* See if bytes move in pairs so we can use pblendw with
38762 an immediate argument, rather than pblendvb with a vector
38764 for (i = 0; i < 16; i += 2)
38765 if (d->perm[i] + 1 != d->perm[i + 1])
38768 for (i = 0; i < nelt; ++i)
38769 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
38772 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
38773 vperm = force_reg (vmode, vperm);
38775 if (GET_MODE_SIZE (vmode) == 16)
38776 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
38778 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
38782 for (i = 0; i < 8; ++i)
38783 mask |= (d->perm[i * 2] >= 16) << i;
38788 target = gen_lowpart (vmode, target);
38789 op0 = gen_lowpart (vmode, op0);
38790 op1 = gen_lowpart (vmode, op1);
38794 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38795 for (i = 0; i < 32; i += 2)
38796 if (d->perm[i] + 1 != d->perm[i + 1])
38798 /* See if bytes move in quadruplets. If yes, vpblendd
38799 with immediate can be used. */
38800 for (i = 0; i < 32; i += 4)
38801 if (d->perm[i] + 2 != d->perm[i + 2])
38805 /* See if bytes move the same in both lanes. If yes,
38806 vpblendw with immediate can be used. */
38807 for (i = 0; i < 16; i += 2)
38808 if (d->perm[i] + 16 != d->perm[i + 16])
38811 /* Use vpblendw. */
38812 for (i = 0; i < 16; ++i)
38813 mask |= (d->perm[i * 2] >= 32) << i;
38818 /* Use vpblendd. */
38819 for (i = 0; i < 8; ++i)
38820 mask |= (d->perm[i * 4] >= 32) << i;
38825 /* See if words move in pairs. If yes, vpblendd can be used. */
38826 for (i = 0; i < 16; i += 2)
38827 if (d->perm[i] + 1 != d->perm[i + 1])
38831 /* See if words move the same in both lanes. If not,
38832 vpblendvb must be used. */
38833 for (i = 0; i < 8; i++)
38834 if (d->perm[i] + 8 != d->perm[i + 8])
38836 /* Use vpblendvb. */
38837 for (i = 0; i < 32; ++i)
38838 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
38842 target = gen_lowpart (vmode, target);
38843 op0 = gen_lowpart (vmode, op0);
38844 op1 = gen_lowpart (vmode, op1);
38845 goto finish_pblendvb;
38848 /* Use vpblendw. */
38849 for (i = 0; i < 16; ++i)
38850 mask |= (d->perm[i] >= 16) << i;
38854 /* Use vpblendd. */
38855 for (i = 0; i < 8; ++i)
38856 mask |= (d->perm[i * 2] >= 16) << i;
38861 /* Use vpblendd. */
38862 for (i = 0; i < 4; ++i)
38863 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
38868 gcc_unreachable ();
38871 /* This matches five different patterns with the different modes. */
38872 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
38873 x = gen_rtx_SET (VOIDmode, target, x);
38879 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38880 in terms of the variable form of vpermilps.
38882 Note that we will have already failed the immediate input vpermilps,
38883 which requires that the high and low part shuffle be identical; the
38884 variable form doesn't require that. */
38887 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
38889 rtx rperm[8], vperm;
38892 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
38895 /* We can only permute within the 128-bit lane. */
38896 for (i = 0; i < 8; ++i)
38898 unsigned e = d->perm[i];
38899 if (i < 4 ? e >= 4 : e < 4)
38906 for (i = 0; i < 8; ++i)
38908 unsigned e = d->perm[i];
38910 /* Within each 128-bit lane, the elements of op0 are numbered
38911 from 0 and the elements of op1 are numbered from 4. */
38917 rperm[i] = GEN_INT (e);
38920 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
38921 vperm = force_reg (V8SImode, vperm);
38922 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
38927 /* Return true if permutation D can be performed as VMODE permutation
38931 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
38933 unsigned int i, j, chunk;
38935 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
38936 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
38937 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
38940 if (GET_MODE_NUNITS (vmode) >= d->nelt)
38943 chunk = d->nelt / GET_MODE_NUNITS (vmode);
38944 for (i = 0; i < d->nelt; i += chunk)
38945 if (d->perm[i] & (chunk - 1))
38948 for (j = 1; j < chunk; ++j)
38949 if (d->perm[i] + j != d->perm[i + j])
38955 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38956 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38959 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
38961 unsigned i, nelt, eltsz, mask;
38962 unsigned char perm[32];
38963 enum machine_mode vmode = V16QImode;
38964 rtx rperm[32], vperm, target, op0, op1;
38968 if (!d->one_operand_p)
38970 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
38973 && valid_perm_using_mode_p (V2TImode, d))
38978 /* Use vperm2i128 insn. The pattern uses
38979 V4DImode instead of V2TImode. */
38980 target = gen_lowpart (V4DImode, d->target);
38981 op0 = gen_lowpart (V4DImode, d->op0);
38982 op1 = gen_lowpart (V4DImode, d->op1);
38984 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
38985 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
38986 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
38994 if (GET_MODE_SIZE (d->vmode) == 16)
38999 else if (GET_MODE_SIZE (d->vmode) == 32)
39004 /* V4DImode should be already handled through
39005 expand_vselect by vpermq instruction. */
39006 gcc_assert (d->vmode != V4DImode);
39009 if (d->vmode == V8SImode
39010 || d->vmode == V16HImode
39011 || d->vmode == V32QImode)
39013 /* First see if vpermq can be used for
39014 V8SImode/V16HImode/V32QImode. */
39015 if (valid_perm_using_mode_p (V4DImode, d))
39017 for (i = 0; i < 4; i++)
39018 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
39021 return expand_vselect (gen_lowpart (V4DImode, d->target),
39022 gen_lowpart (V4DImode, d->op0),
39026 /* Next see if vpermd can be used. */
39027 if (valid_perm_using_mode_p (V8SImode, d))
39030 /* Or if vpermps can be used. */
39031 else if (d->vmode == V8SFmode)
39034 if (vmode == V32QImode)
39036 /* vpshufb only works intra lanes, it is not
39037 possible to shuffle bytes in between the lanes. */
39038 for (i = 0; i < nelt; ++i)
39039 if ((d->perm[i] ^ i) & (nelt / 2))
39050 if (vmode == V8SImode)
39051 for (i = 0; i < 8; ++i)
39052 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
39055 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
39056 if (!d->one_operand_p)
39057 mask = 2 * nelt - 1;
39058 else if (vmode == V16QImode)
39061 mask = nelt / 2 - 1;
39063 for (i = 0; i < nelt; ++i)
39065 unsigned j, e = d->perm[i] & mask;
39066 for (j = 0; j < eltsz; ++j)
39067 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
39071 vperm = gen_rtx_CONST_VECTOR (vmode,
39072 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
39073 vperm = force_reg (vmode, vperm);
39075 target = gen_lowpart (vmode, d->target);
39076 op0 = gen_lowpart (vmode, d->op0);
39077 if (d->one_operand_p)
39079 if (vmode == V16QImode)
39080 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
39081 else if (vmode == V32QImode)
39082 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
39083 else if (vmode == V8SFmode)
39084 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
39086 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
39090 op1 = gen_lowpart (vmode, d->op1);
39091 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
39097 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39098 in a single instruction. */
39101 expand_vec_perm_1 (struct expand_vec_perm_d *d)
39103 unsigned i, nelt = d->nelt;
39104 unsigned char perm2[MAX_VECT_LEN];
39106 /* Check plain VEC_SELECT first, because AVX has instructions that could
39107 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39108 input where SEL+CONCAT may not. */
39109 if (d->one_operand_p)
39111 int mask = nelt - 1;
39112 bool identity_perm = true;
39113 bool broadcast_perm = true;
39115 for (i = 0; i < nelt; i++)
39117 perm2[i] = d->perm[i] & mask;
39119 identity_perm = false;
39121 broadcast_perm = false;
39127 emit_move_insn (d->target, d->op0);
39130 else if (broadcast_perm && TARGET_AVX2)
39132 /* Use vpbroadcast{b,w,d}. */
39133 rtx (*gen) (rtx, rtx) = NULL;
39137 gen = gen_avx2_pbroadcastv32qi_1;
39140 gen = gen_avx2_pbroadcastv16hi_1;
39143 gen = gen_avx2_pbroadcastv8si_1;
39146 gen = gen_avx2_pbroadcastv16qi;
39149 gen = gen_avx2_pbroadcastv8hi;
39152 gen = gen_avx2_vec_dupv8sf_1;
39154 /* For other modes prefer other shuffles this function creates. */
39160 emit_insn (gen (d->target, d->op0));
39165 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
39168 /* There are plenty of patterns in sse.md that are written for
39169 SEL+CONCAT and are not replicated for a single op. Perhaps
39170 that should be changed, to avoid the nastiness here. */
39172 /* Recognize interleave style patterns, which means incrementing
39173 every other permutation operand. */
39174 for (i = 0; i < nelt; i += 2)
39176 perm2[i] = d->perm[i] & mask;
39177 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
39179 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
39183 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39186 for (i = 0; i < nelt; i += 4)
39188 perm2[i + 0] = d->perm[i + 0] & mask;
39189 perm2[i + 1] = d->perm[i + 1] & mask;
39190 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
39191 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
39194 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
39200 /* Finally, try the fully general two operand permute. */
39201 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
39205 /* Recognize interleave style patterns with reversed operands. */
39206 if (!d->one_operand_p)
39208 for (i = 0; i < nelt; ++i)
39210 unsigned e = d->perm[i];
39218 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
39223 /* Try the SSE4.1 blend variable merge instructions. */
39224 if (expand_vec_perm_blend (d))
39227 /* Try one of the AVX vpermil variable permutations. */
39228 if (expand_vec_perm_vpermil (d))
39231 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39232 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39233 if (expand_vec_perm_pshufb (d))
39239 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39240 in terms of a pair of pshuflw + pshufhw instructions. */
39243 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
39245 unsigned char perm2[MAX_VECT_LEN];
39249 if (d->vmode != V8HImode || !d->one_operand_p)
39252 /* The two permutations only operate in 64-bit lanes. */
39253 for (i = 0; i < 4; ++i)
39254 if (d->perm[i] >= 4)
39256 for (i = 4; i < 8; ++i)
39257 if (d->perm[i] < 4)
39263 /* Emit the pshuflw. */
39264 memcpy (perm2, d->perm, 4);
39265 for (i = 4; i < 8; ++i)
39267 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
39270 /* Emit the pshufhw. */
39271 memcpy (perm2 + 4, d->perm + 4, 4);
39272 for (i = 0; i < 4; ++i)
39274 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
39280 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39281 the permutation using the SSSE3 palignr instruction. This succeeds
39282 when all of the elements in PERM fit within one vector and we merely
39283 need to shift them down so that a single vector permutation has a
39284 chance to succeed. */
39287 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
39289 unsigned i, nelt = d->nelt;
39294 /* Even with AVX, palignr only operates on 128-bit vectors. */
39295 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
39298 min = nelt, max = 0;
39299 for (i = 0; i < nelt; ++i)
39301 unsigned e = d->perm[i];
39307 if (min == 0 || max - min >= nelt)
39310 /* Given that we have SSSE3, we know we'll be able to implement the
39311 single operand permutation after the palignr with pshufb. */
39315 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
39316 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
39317 gen_lowpart (TImode, d->op1),
39318 gen_lowpart (TImode, d->op0), shift));
39320 d->op0 = d->op1 = d->target;
39321 d->one_operand_p = true;
39324 for (i = 0; i < nelt; ++i)
39326 unsigned e = d->perm[i] - min;
39332 /* Test for the degenerate case where the alignment by itself
39333 produces the desired permutation. */
39337 ok = expand_vec_perm_1 (d);
39343 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
39345 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39346 a two vector permutation into a single vector permutation by using
39347 an interleave operation to merge the vectors. */
39350 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
39352 struct expand_vec_perm_d dremap, dfinal;
39353 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
39354 unsigned HOST_WIDE_INT contents;
39355 unsigned char remap[2 * MAX_VECT_LEN];
39357 bool ok, same_halves = false;
39359 if (GET_MODE_SIZE (d->vmode) == 16)
39361 if (d->one_operand_p)
39364 else if (GET_MODE_SIZE (d->vmode) == 32)
39368 /* For 32-byte modes allow even d->one_operand_p.
39369 The lack of cross-lane shuffling in some instructions
39370 might prevent a single insn shuffle. */
39372 dfinal.testing_p = true;
39373 /* If expand_vec_perm_interleave3 can expand this into
39374 a 3 insn sequence, give up and let it be expanded as
39375 3 insn sequence. While that is one insn longer,
39376 it doesn't need a memory operand and in the common
39377 case that both interleave low and high permutations
39378 with the same operands are adjacent needs 4 insns
39379 for both after CSE. */
39380 if (expand_vec_perm_interleave3 (&dfinal))
39386 /* Examine from whence the elements come. */
39388 for (i = 0; i < nelt; ++i)
39389 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
39391 memset (remap, 0xff, sizeof (remap));
39394 if (GET_MODE_SIZE (d->vmode) == 16)
39396 unsigned HOST_WIDE_INT h1, h2, h3, h4;
39398 /* Split the two input vectors into 4 halves. */
39399 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
39404 /* If the elements from the low halves use interleave low, and similarly
39405 for interleave high. If the elements are from mis-matched halves, we
39406 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39407 if ((contents & (h1 | h3)) == contents)
39410 for (i = 0; i < nelt2; ++i)
39413 remap[i + nelt] = i * 2 + 1;
39414 dremap.perm[i * 2] = i;
39415 dremap.perm[i * 2 + 1] = i + nelt;
39417 if (!TARGET_SSE2 && d->vmode == V4SImode)
39418 dremap.vmode = V4SFmode;
39420 else if ((contents & (h2 | h4)) == contents)
39423 for (i = 0; i < nelt2; ++i)
39425 remap[i + nelt2] = i * 2;
39426 remap[i + nelt + nelt2] = i * 2 + 1;
39427 dremap.perm[i * 2] = i + nelt2;
39428 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
39430 if (!TARGET_SSE2 && d->vmode == V4SImode)
39431 dremap.vmode = V4SFmode;
39433 else if ((contents & (h1 | h4)) == contents)
39436 for (i = 0; i < nelt2; ++i)
39439 remap[i + nelt + nelt2] = i + nelt2;
39440 dremap.perm[i] = i;
39441 dremap.perm[i + nelt2] = i + nelt + nelt2;
39446 dremap.vmode = V2DImode;
39448 dremap.perm[0] = 0;
39449 dremap.perm[1] = 3;
39452 else if ((contents & (h2 | h3)) == contents)
39455 for (i = 0; i < nelt2; ++i)
39457 remap[i + nelt2] = i;
39458 remap[i + nelt] = i + nelt2;
39459 dremap.perm[i] = i + nelt2;
39460 dremap.perm[i + nelt2] = i + nelt;
39465 dremap.vmode = V2DImode;
39467 dremap.perm[0] = 1;
39468 dremap.perm[1] = 2;
39476 unsigned int nelt4 = nelt / 4, nzcnt = 0;
39477 unsigned HOST_WIDE_INT q[8];
39478 unsigned int nonzero_halves[4];
39480 /* Split the two input vectors into 8 quarters. */
39481 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
39482 for (i = 1; i < 8; ++i)
39483 q[i] = q[0] << (nelt4 * i);
39484 for (i = 0; i < 4; ++i)
39485 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
39487 nonzero_halves[nzcnt] = i;
39493 gcc_assert (d->one_operand_p);
39494 nonzero_halves[1] = nonzero_halves[0];
39495 same_halves = true;
39497 else if (d->one_operand_p)
39499 gcc_assert (nonzero_halves[0] == 0);
39500 gcc_assert (nonzero_halves[1] == 1);
39505 if (d->perm[0] / nelt2 == nonzero_halves[1])
39507 /* Attempt to increase the likelihood that dfinal
39508 shuffle will be intra-lane. */
39509 char tmph = nonzero_halves[0];
39510 nonzero_halves[0] = nonzero_halves[1];
39511 nonzero_halves[1] = tmph;
39514 /* vperm2f128 or vperm2i128. */
39515 for (i = 0; i < nelt2; ++i)
39517 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
39518 remap[i + nonzero_halves[0] * nelt2] = i;
39519 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
39520 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
39523 if (d->vmode != V8SFmode
39524 && d->vmode != V4DFmode
39525 && d->vmode != V8SImode)
39527 dremap.vmode = V8SImode;
39529 for (i = 0; i < 4; ++i)
39531 dremap.perm[i] = i + nonzero_halves[0] * 4;
39532 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
39536 else if (d->one_operand_p)
39538 else if (TARGET_AVX2
39539 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
39542 for (i = 0; i < nelt4; ++i)
39545 remap[i + nelt] = i * 2 + 1;
39546 remap[i + nelt2] = i * 2 + nelt2;
39547 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
39548 dremap.perm[i * 2] = i;
39549 dremap.perm[i * 2 + 1] = i + nelt;
39550 dremap.perm[i * 2 + nelt2] = i + nelt2;
39551 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
39554 else if (TARGET_AVX2
39555 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
39558 for (i = 0; i < nelt4; ++i)
39560 remap[i + nelt4] = i * 2;
39561 remap[i + nelt + nelt4] = i * 2 + 1;
39562 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
39563 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
39564 dremap.perm[i * 2] = i + nelt4;
39565 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
39566 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
39567 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
39574 /* Use the remapping array set up above to move the elements from their
39575 swizzled locations into their final destinations. */
39577 for (i = 0; i < nelt; ++i)
39579 unsigned e = remap[d->perm[i]];
39580 gcc_assert (e < nelt);
39581 /* If same_halves is true, both halves of the remapped vector are the
39582 same. Avoid cross-lane accesses if possible. */
39583 if (same_halves && i >= nelt2)
39585 gcc_assert (e < nelt2);
39586 dfinal.perm[i] = e + nelt2;
39589 dfinal.perm[i] = e;
39591 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
39592 dfinal.op1 = dfinal.op0;
39593 dfinal.one_operand_p = true;
39594 dremap.target = dfinal.op0;
39596 /* Test if the final remap can be done with a single insn. For V4SFmode or
39597 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39599 ok = expand_vec_perm_1 (&dfinal);
39600 seq = get_insns ();
39609 if (dremap.vmode != dfinal.vmode)
39611 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
39612 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
39613 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
39616 ok = expand_vec_perm_1 (&dremap);
39623 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39624 a single vector cross-lane permutation into vpermq followed
39625 by any of the single insn permutations. */
39628 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
39630 struct expand_vec_perm_d dremap, dfinal;
39631 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
39632 unsigned contents[2];
39636 && (d->vmode == V32QImode || d->vmode == V16HImode)
39637 && d->one_operand_p))
39642 for (i = 0; i < nelt2; ++i)
39644 contents[0] |= 1u << (d->perm[i] / nelt4);
39645 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
39648 for (i = 0; i < 2; ++i)
39650 unsigned int cnt = 0;
39651 for (j = 0; j < 4; ++j)
39652 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
39660 dremap.vmode = V4DImode;
39662 dremap.target = gen_reg_rtx (V4DImode);
39663 dremap.op0 = gen_lowpart (V4DImode, d->op0);
39664 dremap.op1 = dremap.op0;
39665 dremap.one_operand_p = true;
39666 for (i = 0; i < 2; ++i)
39668 unsigned int cnt = 0;
39669 for (j = 0; j < 4; ++j)
39670 if ((contents[i] & (1u << j)) != 0)
39671 dremap.perm[2 * i + cnt++] = j;
39672 for (; cnt < 2; ++cnt)
39673 dremap.perm[2 * i + cnt] = 0;
39677 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
39678 dfinal.op1 = dfinal.op0;
39679 dfinal.one_operand_p = true;
39680 for (i = 0, j = 0; i < nelt; ++i)
39684 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
39685 if ((d->perm[i] / nelt4) == dremap.perm[j])
39687 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
39688 dfinal.perm[i] |= nelt4;
39690 gcc_unreachable ();
39693 ok = expand_vec_perm_1 (&dremap);
39696 ok = expand_vec_perm_1 (&dfinal);
39702 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39703 a vector permutation using two instructions, vperm2f128 resp.
39704 vperm2i128 followed by any single in-lane permutation. */
39707 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
39709 struct expand_vec_perm_d dfirst, dsecond;
39710 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
39714 || GET_MODE_SIZE (d->vmode) != 32
39715 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
39719 dsecond.one_operand_p = false;
39720 dsecond.testing_p = true;
39722 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39723 immediate. For perm < 16 the second permutation uses
39724 d->op0 as first operand, for perm >= 16 it uses d->op1
39725 as first operand. The second operand is the result of
39727 for (perm = 0; perm < 32; perm++)
39729 /* Ignore permutations which do not move anything cross-lane. */
39732 /* The second shuffle for e.g. V4DFmode has
39733 0123 and ABCD operands.
39734 Ignore AB23, as 23 is already in the second lane
39735 of the first operand. */
39736 if ((perm & 0xc) == (1 << 2)) continue;
39737 /* And 01CD, as 01 is in the first lane of the first
39739 if ((perm & 3) == 0) continue;
39740 /* And 4567, as then the vperm2[fi]128 doesn't change
39741 anything on the original 4567 second operand. */
39742 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
39746 /* The second shuffle for e.g. V4DFmode has
39747 4567 and ABCD operands.
39748 Ignore AB67, as 67 is already in the second lane
39749 of the first operand. */
39750 if ((perm & 0xc) == (3 << 2)) continue;
39751 /* And 45CD, as 45 is in the first lane of the first
39753 if ((perm & 3) == 2) continue;
39754 /* And 0123, as then the vperm2[fi]128 doesn't change
39755 anything on the original 0123 first operand. */
39756 if ((perm & 0xf) == (1 << 2)) continue;
39759 for (i = 0; i < nelt; i++)
39761 j = d->perm[i] / nelt2;
39762 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
39763 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
39764 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
39765 dsecond.perm[i] = d->perm[i] & (nelt - 1);
39773 ok = expand_vec_perm_1 (&dsecond);
39784 /* Found a usable second shuffle. dfirst will be
39785 vperm2f128 on d->op0 and d->op1. */
39786 dsecond.testing_p = false;
39788 dfirst.target = gen_reg_rtx (d->vmode);
39789 for (i = 0; i < nelt; i++)
39790 dfirst.perm[i] = (i & (nelt2 - 1))
39791 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
39793 ok = expand_vec_perm_1 (&dfirst);
39796 /* And dsecond is some single insn shuffle, taking
39797 d->op0 and result of vperm2f128 (if perm < 16) or
39798 d->op1 and result of vperm2f128 (otherwise). */
39799 dsecond.op1 = dfirst.target;
39801 dsecond.op0 = dfirst.op1;
39803 ok = expand_vec_perm_1 (&dsecond);
39809 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39810 if (d->one_operand_p)
39817 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39818 a two vector permutation using 2 intra-lane interleave insns
39819 and cross-lane shuffle for 32-byte vectors. */
39822 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
39825 rtx (*gen) (rtx, rtx, rtx);
39827 if (d->one_operand_p)
39829 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
39831 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
39837 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
39839 for (i = 0; i < nelt; i += 2)
39840 if (d->perm[i] != d->perm[0] + i / 2
39841 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
39851 gen = gen_vec_interleave_highv32qi;
39853 gen = gen_vec_interleave_lowv32qi;
39857 gen = gen_vec_interleave_highv16hi;
39859 gen = gen_vec_interleave_lowv16hi;
39863 gen = gen_vec_interleave_highv8si;
39865 gen = gen_vec_interleave_lowv8si;
39869 gen = gen_vec_interleave_highv4di;
39871 gen = gen_vec_interleave_lowv4di;
39875 gen = gen_vec_interleave_highv8sf;
39877 gen = gen_vec_interleave_lowv8sf;
39881 gen = gen_vec_interleave_highv4df;
39883 gen = gen_vec_interleave_lowv4df;
39886 gcc_unreachable ();
39889 emit_insn (gen (d->target, d->op0, d->op1));
39893 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39894 a single vector permutation using a single intra-lane vector
39895 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39896 the non-swapped and swapped vectors together. */
39899 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
39901 struct expand_vec_perm_d dfirst, dsecond;
39902 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
39905 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
39909 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
39910 || !d->one_operand_p)
39914 for (i = 0; i < nelt; i++)
39915 dfirst.perm[i] = 0xff;
39916 for (i = 0, msk = 0; i < nelt; i++)
39918 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
39919 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
39921 dfirst.perm[j] = d->perm[i];
39925 for (i = 0; i < nelt; i++)
39926 if (dfirst.perm[i] == 0xff)
39927 dfirst.perm[i] = i;
39930 dfirst.target = gen_reg_rtx (dfirst.vmode);
39933 ok = expand_vec_perm_1 (&dfirst);
39934 seq = get_insns ();
39946 dsecond.op0 = dfirst.target;
39947 dsecond.op1 = dfirst.target;
39948 dsecond.one_operand_p = true;
39949 dsecond.target = gen_reg_rtx (dsecond.vmode);
39950 for (i = 0; i < nelt; i++)
39951 dsecond.perm[i] = i ^ nelt2;
39953 ok = expand_vec_perm_1 (&dsecond);
39956 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
39957 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
39961 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39962 permutation using two vperm2f128, followed by a vshufpd insn blending
39963 the two vectors together. */
39966 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
39968 struct expand_vec_perm_d dfirst, dsecond, dthird;
39971 if (!TARGET_AVX || (d->vmode != V4DFmode))
39981 dfirst.perm[0] = (d->perm[0] & ~1);
39982 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
39983 dfirst.perm[2] = (d->perm[2] & ~1);
39984 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
39985 dsecond.perm[0] = (d->perm[1] & ~1);
39986 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
39987 dsecond.perm[2] = (d->perm[3] & ~1);
39988 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
39989 dthird.perm[0] = (d->perm[0] % 2);
39990 dthird.perm[1] = (d->perm[1] % 2) + 4;
39991 dthird.perm[2] = (d->perm[2] % 2) + 2;
39992 dthird.perm[3] = (d->perm[3] % 2) + 6;
39994 dfirst.target = gen_reg_rtx (dfirst.vmode);
39995 dsecond.target = gen_reg_rtx (dsecond.vmode);
39996 dthird.op0 = dfirst.target;
39997 dthird.op1 = dsecond.target;
39998 dthird.one_operand_p = false;
40000 canonicalize_perm (&dfirst);
40001 canonicalize_perm (&dsecond);
40003 ok = expand_vec_perm_1 (&dfirst)
40004 && expand_vec_perm_1 (&dsecond)
40005 && expand_vec_perm_1 (&dthird);
40012 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40013 permutation with two pshufb insns and an ior. We should have already
40014 failed all two instruction sequences. */
40017 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
40019 rtx rperm[2][16], vperm, l, h, op, m128;
40020 unsigned int i, nelt, eltsz;
40022 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
40024 gcc_assert (!d->one_operand_p);
40027 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40029 /* Generate two permutation masks. If the required element is within
40030 the given vector it is shuffled into the proper lane. If the required
40031 element is in the other vector, force a zero into the lane by setting
40032 bit 7 in the permutation mask. */
40033 m128 = GEN_INT (-128);
40034 for (i = 0; i < nelt; ++i)
40036 unsigned j, e = d->perm[i];
40037 unsigned which = (e >= nelt);
40041 for (j = 0; j < eltsz; ++j)
40043 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
40044 rperm[1-which][i*eltsz + j] = m128;
40048 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
40049 vperm = force_reg (V16QImode, vperm);
40051 l = gen_reg_rtx (V16QImode);
40052 op = gen_lowpart (V16QImode, d->op0);
40053 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
40055 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
40056 vperm = force_reg (V16QImode, vperm);
40058 h = gen_reg_rtx (V16QImode);
40059 op = gen_lowpart (V16QImode, d->op1);
40060 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
40062 op = gen_lowpart (V16QImode, d->target);
40063 emit_insn (gen_iorv16qi3 (op, l, h));
40068 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40069 with two vpshufb insns, vpermq and vpor. We should have already failed
40070 all two or three instruction sequences. */
40073 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
40075 rtx rperm[2][32], vperm, l, h, hp, op, m128;
40076 unsigned int i, nelt, eltsz;
40079 || !d->one_operand_p
40080 || (d->vmode != V32QImode && d->vmode != V16HImode))
40087 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40089 /* Generate two permutation masks. If the required element is within
40090 the same lane, it is shuffled in. If the required element from the
40091 other lane, force a zero by setting bit 7 in the permutation mask.
40092 In the other mask the mask has non-negative elements if element
40093 is requested from the other lane, but also moved to the other lane,
40094 so that the result of vpshufb can have the two V2TImode halves
40096 m128 = GEN_INT (-128);
40097 for (i = 0; i < nelt; ++i)
40099 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
40100 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
40102 for (j = 0; j < eltsz; ++j)
40104 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
40105 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
40109 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
40110 vperm = force_reg (V32QImode, vperm);
40112 h = gen_reg_rtx (V32QImode);
40113 op = gen_lowpart (V32QImode, d->op0);
40114 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
40116 /* Swap the 128-byte lanes of h into hp. */
40117 hp = gen_reg_rtx (V4DImode);
40118 op = gen_lowpart (V4DImode, h);
40119 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
40122 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
40123 vperm = force_reg (V32QImode, vperm);
40125 l = gen_reg_rtx (V32QImode);
40126 op = gen_lowpart (V32QImode, d->op0);
40127 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
40129 op = gen_lowpart (V32QImode, d->target);
40130 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
40135 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40136 and extract-odd permutations of two V32QImode and V16QImode operand
40137 with two vpshufb insns, vpor and vpermq. We should have already
40138 failed all two or three instruction sequences. */
40141 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
40143 rtx rperm[2][32], vperm, l, h, ior, op, m128;
40144 unsigned int i, nelt, eltsz;
40147 || d->one_operand_p
40148 || (d->vmode != V32QImode && d->vmode != V16HImode))
40151 for (i = 0; i < d->nelt; ++i)
40152 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
40159 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40161 /* Generate two permutation masks. In the first permutation mask
40162 the first quarter will contain indexes for the first half
40163 of the op0, the second quarter will contain bit 7 set, third quarter
40164 will contain indexes for the second half of the op0 and the
40165 last quarter bit 7 set. In the second permutation mask
40166 the first quarter will contain bit 7 set, the second quarter
40167 indexes for the first half of the op1, the third quarter bit 7 set
40168 and last quarter indexes for the second half of the op1.
40169 I.e. the first mask e.g. for V32QImode extract even will be:
40170 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40171 (all values masked with 0xf except for -128) and second mask
40172 for extract even will be
40173 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40174 m128 = GEN_INT (-128);
40175 for (i = 0; i < nelt; ++i)
40177 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
40178 unsigned which = d->perm[i] >= nelt;
40179 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
40181 for (j = 0; j < eltsz; ++j)
40183 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
40184 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
40188 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
40189 vperm = force_reg (V32QImode, vperm);
40191 l = gen_reg_rtx (V32QImode);
40192 op = gen_lowpart (V32QImode, d->op0);
40193 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
40195 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
40196 vperm = force_reg (V32QImode, vperm);
40198 h = gen_reg_rtx (V32QImode);
40199 op = gen_lowpart (V32QImode, d->op1);
40200 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
40202 ior = gen_reg_rtx (V32QImode);
40203 emit_insn (gen_iorv32qi3 (ior, l, h));
40205 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40206 op = gen_lowpart (V4DImode, d->target);
40207 ior = gen_lowpart (V4DImode, ior);
40208 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
40209 const1_rtx, GEN_INT (3)));
40214 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40215 and extract-odd permutations. */
40218 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
40225 t1 = gen_reg_rtx (V4DFmode);
40226 t2 = gen_reg_rtx (V4DFmode);
40228 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40229 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
40230 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
40232 /* Now an unpck[lh]pd will produce the result required. */
40234 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
40236 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
40242 int mask = odd ? 0xdd : 0x88;
40244 t1 = gen_reg_rtx (V8SFmode);
40245 t2 = gen_reg_rtx (V8SFmode);
40246 t3 = gen_reg_rtx (V8SFmode);
40248 /* Shuffle within the 128-bit lanes to produce:
40249 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40250 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
40253 /* Shuffle the lanes around to produce:
40254 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40255 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
40258 /* Shuffle within the 128-bit lanes to produce:
40259 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40260 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
40262 /* Shuffle within the 128-bit lanes to produce:
40263 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40264 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
40266 /* Shuffle the lanes around to produce:
40267 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40268 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
40277 /* These are always directly implementable by expand_vec_perm_1. */
40278 gcc_unreachable ();
40282 return expand_vec_perm_pshufb2 (d);
40285 /* We need 2*log2(N)-1 operations to achieve odd/even
40286 with interleave. */
40287 t1 = gen_reg_rtx (V8HImode);
40288 t2 = gen_reg_rtx (V8HImode);
40289 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
40290 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
40291 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
40292 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
40294 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
40296 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
40303 return expand_vec_perm_pshufb2 (d);
40306 t1 = gen_reg_rtx (V16QImode);
40307 t2 = gen_reg_rtx (V16QImode);
40308 t3 = gen_reg_rtx (V16QImode);
40309 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
40310 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
40311 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
40312 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
40313 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
40314 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
40316 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
40318 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
40325 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
40330 struct expand_vec_perm_d d_copy = *d;
40331 d_copy.vmode = V4DFmode;
40332 d_copy.target = gen_lowpart (V4DFmode, d->target);
40333 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
40334 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
40335 return expand_vec_perm_even_odd_1 (&d_copy, odd);
40338 t1 = gen_reg_rtx (V4DImode);
40339 t2 = gen_reg_rtx (V4DImode);
40341 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40342 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
40343 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
40345 /* Now an vpunpck[lh]qdq will produce the result required. */
40347 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
40349 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
40356 struct expand_vec_perm_d d_copy = *d;
40357 d_copy.vmode = V8SFmode;
40358 d_copy.target = gen_lowpart (V8SFmode, d->target);
40359 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
40360 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
40361 return expand_vec_perm_even_odd_1 (&d_copy, odd);
40364 t1 = gen_reg_rtx (V8SImode);
40365 t2 = gen_reg_rtx (V8SImode);
40367 /* Shuffle the lanes around into
40368 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40369 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
40370 gen_lowpart (V4DImode, d->op0),
40371 gen_lowpart (V4DImode, d->op1),
40373 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
40374 gen_lowpart (V4DImode, d->op0),
40375 gen_lowpart (V4DImode, d->op1),
40378 /* Swap the 2nd and 3rd position in each lane into
40379 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40380 emit_insn (gen_avx2_pshufdv3 (t1, t1,
40381 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40382 emit_insn (gen_avx2_pshufdv3 (t2, t2,
40383 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40385 /* Now an vpunpck[lh]qdq will produce
40386 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40388 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
40389 gen_lowpart (V4DImode, t1),
40390 gen_lowpart (V4DImode, t2));
40392 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
40393 gen_lowpart (V4DImode, t1),
40394 gen_lowpart (V4DImode, t2));
40399 gcc_unreachable ();
40405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40406 extract-even and extract-odd permutations. */
40409 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
40411 unsigned i, odd, nelt = d->nelt;
40414 if (odd != 0 && odd != 1)
40417 for (i = 1; i < nelt; ++i)
40418 if (d->perm[i] != 2 * i + odd)
40421 return expand_vec_perm_even_odd_1 (d, odd);
40424 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40425 permutations. We assume that expand_vec_perm_1 has already failed. */
40428 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
40430 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
40431 enum machine_mode vmode = d->vmode;
40432 unsigned char perm2[4];
40440 /* These are special-cased in sse.md so that we can optionally
40441 use the vbroadcast instruction. They expand to two insns
40442 if the input happens to be in a register. */
40443 gcc_unreachable ();
40449 /* These are always implementable using standard shuffle patterns. */
40450 gcc_unreachable ();
40454 /* These can be implemented via interleave. We save one insn by
40455 stopping once we have promoted to V4SImode and then use pshufd. */
40459 rtx (*gen) (rtx, rtx, rtx)
40460 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
40461 : gen_vec_interleave_lowv8hi;
40465 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
40466 : gen_vec_interleave_highv8hi;
40471 dest = gen_reg_rtx (vmode);
40472 emit_insn (gen (dest, op0, op0));
40473 vmode = get_mode_wider_vector (vmode);
40474 op0 = gen_lowpart (vmode, dest);
40476 while (vmode != V4SImode);
40478 memset (perm2, elt, 4);
40479 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4,
40488 /* For AVX2 broadcasts of the first element vpbroadcast* or
40489 vpermq should be used by expand_vec_perm_1. */
40490 gcc_assert (!TARGET_AVX2 || d->perm[0]);
40494 gcc_unreachable ();
40498 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40499 broadcast permutations. */
40502 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
40504 unsigned i, elt, nelt = d->nelt;
40506 if (!d->one_operand_p)
40510 for (i = 1; i < nelt; ++i)
40511 if (d->perm[i] != elt)
40514 return expand_vec_perm_broadcast_1 (d);
40517 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40518 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40519 all the shorter instruction sequences. */
40522 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
40524 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
40525 unsigned int i, nelt, eltsz;
40529 || d->one_operand_p
40530 || (d->vmode != V32QImode && d->vmode != V16HImode))
40537 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40539 /* Generate 4 permutation masks. If the required element is within
40540 the same lane, it is shuffled in. If the required element from the
40541 other lane, force a zero by setting bit 7 in the permutation mask.
40542 In the other mask the mask has non-negative elements if element
40543 is requested from the other lane, but also moved to the other lane,
40544 so that the result of vpshufb can have the two V2TImode halves
40546 m128 = GEN_INT (-128);
40547 for (i = 0; i < 32; ++i)
40549 rperm[0][i] = m128;
40550 rperm[1][i] = m128;
40551 rperm[2][i] = m128;
40552 rperm[3][i] = m128;
40558 for (i = 0; i < nelt; ++i)
40560 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
40561 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
40562 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
40564 for (j = 0; j < eltsz; ++j)
40565 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
40566 used[which] = true;
40569 for (i = 0; i < 2; ++i)
40571 if (!used[2 * i + 1])
40576 vperm = gen_rtx_CONST_VECTOR (V32QImode,
40577 gen_rtvec_v (32, rperm[2 * i + 1]));
40578 vperm = force_reg (V32QImode, vperm);
40579 h[i] = gen_reg_rtx (V32QImode);
40580 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
40581 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
40584 /* Swap the 128-byte lanes of h[X]. */
40585 for (i = 0; i < 2; ++i)
40587 if (h[i] == NULL_RTX)
40589 op = gen_reg_rtx (V4DImode);
40590 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
40591 const2_rtx, GEN_INT (3), const0_rtx,
40593 h[i] = gen_lowpart (V32QImode, op);
40596 for (i = 0; i < 2; ++i)
40603 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
40604 vperm = force_reg (V32QImode, vperm);
40605 l[i] = gen_reg_rtx (V32QImode);
40606 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
40607 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
40610 for (i = 0; i < 2; ++i)
40614 op = gen_reg_rtx (V32QImode);
40615 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
40622 gcc_assert (l[0] && l[1]);
40623 op = gen_lowpart (V32QImode, d->target);
40624 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
40628 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40629 With all of the interface bits taken care of, perform the expansion
40630 in D and return true on success. */
40633 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
40635 /* Try a single instruction expansion. */
40636 if (expand_vec_perm_1 (d))
40639 /* Try sequences of two instructions. */
40641 if (expand_vec_perm_pshuflw_pshufhw (d))
40644 if (expand_vec_perm_palignr (d))
40647 if (expand_vec_perm_interleave2 (d))
40650 if (expand_vec_perm_broadcast (d))
40653 if (expand_vec_perm_vpermq_perm_1 (d))
40656 if (expand_vec_perm_vperm2f128 (d))
40659 /* Try sequences of three instructions. */
40661 if (expand_vec_perm_2vperm2f128_vshuf (d))
40664 if (expand_vec_perm_pshufb2 (d))
40667 if (expand_vec_perm_interleave3 (d))
40670 if (expand_vec_perm_vperm2f128_vblend (d))
40673 /* Try sequences of four instructions. */
40675 if (expand_vec_perm_vpshufb2_vpermq (d))
40678 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
40681 /* ??? Look for narrow permutations whose element orderings would
40682 allow the promotion to a wider mode. */
40684 /* ??? Look for sequences of interleave or a wider permute that place
40685 the data into the correct lanes for a half-vector shuffle like
40686 pshuf[lh]w or vpermilps. */
40688 /* ??? Look for sequences of interleave that produce the desired results.
40689 The combinatorics of punpck[lh] get pretty ugly... */
40691 if (expand_vec_perm_even_odd (d))
40694 /* Even longer sequences. */
40695 if (expand_vec_perm_vpshufb4_vpermq2 (d))
40701 /* If a permutation only uses one operand, make it clear. Returns true
40702 if the permutation references both operands. */
40705 canonicalize_perm (struct expand_vec_perm_d *d)
40707 int i, which, nelt = d->nelt;
40709 for (i = which = 0; i < nelt; ++i)
40710 which |= (d->perm[i] < nelt ? 1 : 2);
40712 d->one_operand_p = true;
40719 if (!rtx_equal_p (d->op0, d->op1))
40721 d->one_operand_p = false;
40724 /* The elements of PERM do not suggest that only the first operand
40725 is used, but both operands are identical. Allow easier matching
40726 of the permutation by folding the permutation into the single
40731 for (i = 0; i < nelt; ++i)
40732 d->perm[i] &= nelt - 1;
40741 return (which == 3);
40745 ix86_expand_vec_perm_const (rtx operands[4])
40747 struct expand_vec_perm_d d;
40748 unsigned char perm[MAX_VECT_LEN];
40753 d.target = operands[0];
40754 d.op0 = operands[1];
40755 d.op1 = operands[2];
40758 d.vmode = GET_MODE (d.target);
40759 gcc_assert (VECTOR_MODE_P (d.vmode));
40760 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40761 d.testing_p = false;
40763 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
40764 gcc_assert (XVECLEN (sel, 0) == nelt);
40765 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
40767 for (i = 0; i < nelt; ++i)
40769 rtx e = XVECEXP (sel, 0, i);
40770 int ei = INTVAL (e) & (2 * nelt - 1);
40775 two_args = canonicalize_perm (&d);
40777 if (ix86_expand_vec_perm_const_1 (&d))
40780 /* If the selector says both arguments are needed, but the operands are the
40781 same, the above tried to expand with one_operand_p and flattened selector.
40782 If that didn't work, retry without one_operand_p; we succeeded with that
40784 if (two_args && d.one_operand_p)
40786 d.one_operand_p = false;
40787 memcpy (d.perm, perm, sizeof (perm));
40788 return ix86_expand_vec_perm_const_1 (&d);
40794 /* Implement targetm.vectorize.vec_perm_const_ok. */
40797 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
40798 const unsigned char *sel)
40800 struct expand_vec_perm_d d;
40801 unsigned int i, nelt, which;
40805 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40806 d.testing_p = true;
40808 /* Given sufficient ISA support we can just return true here
40809 for selected vector modes. */
40810 if (GET_MODE_SIZE (d.vmode) == 16)
40812 /* All implementable with a single vpperm insn. */
40815 /* All implementable with 2 pshufb + 1 ior. */
40818 /* All implementable with shufpd or unpck[lh]pd. */
40823 /* Extract the values from the vector CST into the permutation
40825 memcpy (d.perm, sel, nelt);
40826 for (i = which = 0; i < nelt; ++i)
40828 unsigned char e = d.perm[i];
40829 gcc_assert (e < 2 * nelt);
40830 which |= (e < nelt ? 1 : 2);
40833 /* For all elements from second vector, fold the elements to first. */
40835 for (i = 0; i < nelt; ++i)
40838 /* Check whether the mask can be applied to the vector type. */
40839 d.one_operand_p = (which != 3);
40841 /* Implementable with shufps or pshufd. */
40842 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
40845 /* Otherwise we have to go through the motions and see if we can
40846 figure out how to generate the requested permutation. */
40847 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
40848 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
40849 if (!d.one_operand_p)
40850 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
40853 ret = ix86_expand_vec_perm_const_1 (&d);
40860 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
40862 struct expand_vec_perm_d d;
40868 d.vmode = GET_MODE (targ);
40869 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40870 d.one_operand_p = false;
40871 d.testing_p = false;
40873 for (i = 0; i < nelt; ++i)
40874 d.perm[i] = i * 2 + odd;
40876 /* We'll either be able to implement the permutation directly... */
40877 if (expand_vec_perm_1 (&d))
40880 /* ... or we use the special-case patterns. */
40881 expand_vec_perm_even_odd_1 (&d, odd);
40885 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
40887 struct expand_vec_perm_d d;
40888 unsigned i, nelt, base;
40894 d.vmode = GET_MODE (targ);
40895 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40896 d.one_operand_p = false;
40897 d.testing_p = false;
40899 base = high_p ? nelt / 2 : 0;
40900 for (i = 0; i < nelt / 2; ++i)
40902 d.perm[i * 2] = i + base;
40903 d.perm[i * 2 + 1] = i + base + nelt;
40906 /* Note that for AVX this isn't one instruction. */
40907 ok = ix86_expand_vec_perm_const_1 (&d);
40912 /* Expand a vector operation CODE for a V*QImode in terms of the
40913 same operation on V*HImode. */
40916 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
40918 enum machine_mode qimode = GET_MODE (dest);
40919 enum machine_mode himode;
40920 rtx (*gen_il) (rtx, rtx, rtx);
40921 rtx (*gen_ih) (rtx, rtx, rtx);
40922 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
40923 struct expand_vec_perm_d d;
40924 bool ok, full_interleave;
40925 bool uns_p = false;
40932 gen_il = gen_vec_interleave_lowv16qi;
40933 gen_ih = gen_vec_interleave_highv16qi;
40936 himode = V16HImode;
40937 gen_il = gen_avx2_interleave_lowv32qi;
40938 gen_ih = gen_avx2_interleave_highv32qi;
40941 gcc_unreachable ();
40944 op2_l = op2_h = op2;
40948 /* Unpack data such that we've got a source byte in each low byte of
40949 each word. We don't care what goes into the high byte of each word.
40950 Rather than trying to get zero in there, most convenient is to let
40951 it be a copy of the low byte. */
40952 op2_l = gen_reg_rtx (qimode);
40953 op2_h = gen_reg_rtx (qimode);
40954 emit_insn (gen_il (op2_l, op2, op2));
40955 emit_insn (gen_ih (op2_h, op2, op2));
40958 op1_l = gen_reg_rtx (qimode);
40959 op1_h = gen_reg_rtx (qimode);
40960 emit_insn (gen_il (op1_l, op1, op1));
40961 emit_insn (gen_ih (op1_h, op1, op1));
40962 full_interleave = qimode == V16QImode;
40970 op1_l = gen_reg_rtx (himode);
40971 op1_h = gen_reg_rtx (himode);
40972 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
40973 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
40974 full_interleave = true;
40977 gcc_unreachable ();
40980 /* Perform the operation. */
40981 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
40983 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
40985 gcc_assert (res_l && res_h);
40987 /* Merge the data back into the right place. */
40989 d.op0 = gen_lowpart (qimode, res_l);
40990 d.op1 = gen_lowpart (qimode, res_h);
40992 d.nelt = GET_MODE_NUNITS (qimode);
40993 d.one_operand_p = false;
40994 d.testing_p = false;
40996 if (full_interleave)
40998 /* For SSE2, we used an full interleave, so the desired
40999 results are in the even elements. */
41000 for (i = 0; i < 32; ++i)
41005 /* For AVX, the interleave used above was not cross-lane. So the
41006 extraction is evens but with the second and third quarter swapped.
41007 Happily, that is even one insn shorter than even extraction. */
41008 for (i = 0; i < 32; ++i)
41009 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
41012 ok = ix86_expand_vec_perm_const_1 (&d);
41015 set_unique_reg_note (get_last_insn (), REG_EQUAL,
41016 gen_rtx_fmt_ee (code, qimode, op1, op2));
41020 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
41021 bool uns_p, bool odd_p)
41023 enum machine_mode mode = GET_MODE (op1);
41024 enum machine_mode wmode = GET_MODE (dest);
41027 /* We only play even/odd games with vectors of SImode. */
41028 gcc_assert (mode == V4SImode || mode == V8SImode);
41030 /* If we're looking for the odd results, shift those members down to
41031 the even slots. For some cpus this is faster than a PSHUFD. */
41034 /* For XOP use vpmacsdqh, but only for smult, as it is only
41036 if (TARGET_XOP && mode == V4SImode && !uns_p)
41038 x = force_reg (wmode, CONST0_RTX (wmode));
41039 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
41043 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
41044 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
41045 x, NULL, 1, OPTAB_DIRECT);
41046 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
41047 x, NULL, 1, OPTAB_DIRECT);
41048 op1 = gen_lowpart (mode, op1);
41049 op2 = gen_lowpart (mode, op2);
41052 if (mode == V8SImode)
41055 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
41057 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
41060 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
41061 else if (TARGET_SSE4_1)
41062 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
41065 rtx s1, s2, t0, t1, t2;
41067 /* The easiest way to implement this without PMULDQ is to go through
41068 the motions as if we are performing a full 64-bit multiply. With
41069 the exception that we need to do less shuffling of the elements. */
41071 /* Compute the sign-extension, aka highparts, of the two operands. */
41072 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
41073 op1, pc_rtx, pc_rtx);
41074 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
41075 op2, pc_rtx, pc_rtx);
41077 /* Multiply LO(A) * HI(B), and vice-versa. */
41078 t1 = gen_reg_rtx (wmode);
41079 t2 = gen_reg_rtx (wmode);
41080 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
41081 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
41083 /* Multiply LO(A) * LO(B). */
41084 t0 = gen_reg_rtx (wmode);
41085 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
41087 /* Combine and shift the highparts into place. */
41088 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
41089 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
41092 /* Combine high and low parts. */
41093 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
41100 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
41101 bool uns_p, bool high_p)
41103 enum machine_mode wmode = GET_MODE (dest);
41104 enum machine_mode mode = GET_MODE (op1);
41105 rtx t1, t2, t3, t4, mask;
41110 t1 = gen_reg_rtx (mode);
41111 t2 = gen_reg_rtx (mode);
41112 if (TARGET_XOP && !uns_p)
41114 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41115 shuffle the elements once so that all elements are in the right
41116 place for immediate use: { A C B D }. */
41117 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
41118 const1_rtx, GEN_INT (3)));
41119 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
41120 const1_rtx, GEN_INT (3)));
41124 /* Put the elements into place for the multiply. */
41125 ix86_expand_vec_interleave (t1, op1, op1, high_p);
41126 ix86_expand_vec_interleave (t2, op2, op2, high_p);
41129 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
41133 /* Shuffle the elements between the lanes. After this we
41134 have { A B E F | C D G H } for each operand. */
41135 t1 = gen_reg_rtx (V4DImode);
41136 t2 = gen_reg_rtx (V4DImode);
41137 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
41138 const0_rtx, const2_rtx,
41139 const1_rtx, GEN_INT (3)));
41140 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
41141 const0_rtx, const2_rtx,
41142 const1_rtx, GEN_INT (3)));
41144 /* Shuffle the elements within the lanes. After this we
41145 have { A A B B | C C D D } or { E E F F | G G H H }. */
41146 t3 = gen_reg_rtx (V8SImode);
41147 t4 = gen_reg_rtx (V8SImode);
41148 mask = GEN_INT (high_p
41149 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41150 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41151 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
41152 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
41154 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
41159 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
41160 uns_p, OPTAB_DIRECT);
41161 t2 = expand_binop (mode,
41162 uns_p ? umul_highpart_optab : smul_highpart_optab,
41163 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
41164 gcc_assert (t1 && t2);
41166 ix86_expand_vec_interleave (gen_lowpart (mode, dest), t1, t2, high_p);
41171 t1 = gen_reg_rtx (wmode);
41172 t2 = gen_reg_rtx (wmode);
41173 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
41174 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
41176 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
41180 gcc_unreachable ();
41185 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
41189 res_1 = gen_reg_rtx (V4SImode);
41190 res_2 = gen_reg_rtx (V4SImode);
41191 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_1),
41192 op1, op2, true, false);
41193 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_2),
41194 op1, op2, true, true);
41196 /* Move the results in element 2 down to element 1; we don't care
41197 what goes in elements 2 and 3. Then we can merge the parts
41198 back together with an interleave.
41200 Note that two other sequences were tried:
41201 (1) Use interleaves at the start instead of psrldq, which allows
41202 us to use a single shufps to merge things back at the end.
41203 (2) Use shufps here to combine the two vectors, then pshufd to
41204 put the elements in the correct order.
41205 In both cases the cost of the reformatting stall was too high
41206 and the overall sequence slower. */
41208 emit_insn (gen_sse2_pshufd_1 (res_1, res_1, const0_rtx, const2_rtx,
41209 const0_rtx, const0_rtx));
41210 emit_insn (gen_sse2_pshufd_1 (res_2, res_2, const0_rtx, const2_rtx,
41211 const0_rtx, const0_rtx));
41212 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
41214 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
41218 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
41220 enum machine_mode mode = GET_MODE (op0);
41221 rtx t1, t2, t3, t4, t5, t6;
41223 if (TARGET_XOP && mode == V2DImode)
41225 /* op1: A,B,C,D, op2: E,F,G,H */
41226 op1 = gen_lowpart (V4SImode, op1);
41227 op2 = gen_lowpart (V4SImode, op2);
41229 t1 = gen_reg_rtx (V4SImode);
41230 t2 = gen_reg_rtx (V4SImode);
41231 t3 = gen_reg_rtx (V2DImode);
41232 t4 = gen_reg_rtx (V2DImode);
41235 emit_insn (gen_sse2_pshufd_1 (t1, op1,
41241 /* t2: (B*E),(A*F),(D*G),(C*H) */
41242 emit_insn (gen_mulv4si3 (t2, t1, op2));
41244 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41245 emit_insn (gen_xop_phadddq (t3, t2));
41247 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41248 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
41250 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41251 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
41255 enum machine_mode nmode;
41256 rtx (*umul) (rtx, rtx, rtx);
41258 if (mode == V2DImode)
41260 umul = gen_vec_widen_umult_even_v4si;
41263 else if (mode == V4DImode)
41265 umul = gen_vec_widen_umult_even_v8si;
41269 gcc_unreachable ();
41272 /* Multiply low parts. */
41273 t1 = gen_reg_rtx (mode);
41274 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
41276 /* Shift input vectors right 32 bits so we can multiply high parts. */
41278 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
41279 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
41281 /* Multiply high parts by low parts. */
41282 t4 = gen_reg_rtx (mode);
41283 t5 = gen_reg_rtx (mode);
41284 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
41285 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
41287 /* Combine and shift the highparts back. */
41288 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
41289 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
41291 /* Combine high and low parts. */
41292 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
41295 set_unique_reg_note (get_last_insn (), REG_EQUAL,
41296 gen_rtx_MULT (mode, op1, op2));
41299 /* Expand an insert into a vector register through pinsr insn.
41300 Return true if successful. */
41303 ix86_expand_pinsr (rtx *operands)
41305 rtx dst = operands[0];
41306 rtx src = operands[3];
41308 unsigned int size = INTVAL (operands[1]);
41309 unsigned int pos = INTVAL (operands[2]);
41311 if (GET_CODE (dst) == SUBREG)
41313 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
41314 dst = SUBREG_REG (dst);
41317 if (GET_CODE (src) == SUBREG)
41318 src = SUBREG_REG (src);
41320 switch (GET_MODE (dst))
41327 enum machine_mode srcmode, dstmode;
41328 rtx (*pinsr)(rtx, rtx, rtx, rtx);
41330 srcmode = mode_for_size (size, MODE_INT, 0);
41335 if (!TARGET_SSE4_1)
41337 dstmode = V16QImode;
41338 pinsr = gen_sse4_1_pinsrb;
41344 dstmode = V8HImode;
41345 pinsr = gen_sse2_pinsrw;
41349 if (!TARGET_SSE4_1)
41351 dstmode = V4SImode;
41352 pinsr = gen_sse4_1_pinsrd;
41356 gcc_assert (TARGET_64BIT);
41357 if (!TARGET_SSE4_1)
41359 dstmode = V2DImode;
41360 pinsr = gen_sse4_1_pinsrq;
41367 dst = gen_lowpart (dstmode, dst);
41368 src = gen_lowpart (srcmode, src);
41372 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
41381 /* This function returns the calling abi specific va_list type node.
41382 It returns the FNDECL specific va_list type. */
41385 ix86_fn_abi_va_list (tree fndecl)
41388 return va_list_type_node;
41389 gcc_assert (fndecl != NULL_TREE);
41391 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
41392 return ms_va_list_type_node;
41394 return sysv_va_list_type_node;
41397 /* Returns the canonical va_list type specified by TYPE. If there
41398 is no valid TYPE provided, it return NULL_TREE. */
41401 ix86_canonical_va_list_type (tree type)
41405 /* Resolve references and pointers to va_list type. */
41406 if (TREE_CODE (type) == MEM_REF)
41407 type = TREE_TYPE (type);
41408 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
41409 type = TREE_TYPE (type);
41410 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
41411 type = TREE_TYPE (type);
41413 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
41415 wtype = va_list_type_node;
41416 gcc_assert (wtype != NULL_TREE);
41418 if (TREE_CODE (wtype) == ARRAY_TYPE)
41420 /* If va_list is an array type, the argument may have decayed
41421 to a pointer type, e.g. by being passed to another function.
41422 In that case, unwrap both types so that we can compare the
41423 underlying records. */
41424 if (TREE_CODE (htype) == ARRAY_TYPE
41425 || POINTER_TYPE_P (htype))
41427 wtype = TREE_TYPE (wtype);
41428 htype = TREE_TYPE (htype);
41431 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
41432 return va_list_type_node;
41433 wtype = sysv_va_list_type_node;
41434 gcc_assert (wtype != NULL_TREE);
41436 if (TREE_CODE (wtype) == ARRAY_TYPE)
41438 /* If va_list is an array type, the argument may have decayed
41439 to a pointer type, e.g. by being passed to another function.
41440 In that case, unwrap both types so that we can compare the
41441 underlying records. */
41442 if (TREE_CODE (htype) == ARRAY_TYPE
41443 || POINTER_TYPE_P (htype))
41445 wtype = TREE_TYPE (wtype);
41446 htype = TREE_TYPE (htype);
41449 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
41450 return sysv_va_list_type_node;
41451 wtype = ms_va_list_type_node;
41452 gcc_assert (wtype != NULL_TREE);
41454 if (TREE_CODE (wtype) == ARRAY_TYPE)
41456 /* If va_list is an array type, the argument may have decayed
41457 to a pointer type, e.g. by being passed to another function.
41458 In that case, unwrap both types so that we can compare the
41459 underlying records. */
41460 if (TREE_CODE (htype) == ARRAY_TYPE
41461 || POINTER_TYPE_P (htype))
41463 wtype = TREE_TYPE (wtype);
41464 htype = TREE_TYPE (htype);
41467 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
41468 return ms_va_list_type_node;
41471 return std_canonical_va_list_type (type);
41474 /* Iterate through the target-specific builtin types for va_list.
41475 IDX denotes the iterator, *PTREE is set to the result type of
41476 the va_list builtin, and *PNAME to its internal type.
41477 Returns zero if there is no element for this index, otherwise
41478 IDX should be increased upon the next call.
41479 Note, do not iterate a base builtin's name like __builtin_va_list.
41480 Used from c_common_nodes_and_builtins. */
41483 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
41493 *ptree = ms_va_list_type_node;
41494 *pname = "__builtin_ms_va_list";
41498 *ptree = sysv_va_list_type_node;
41499 *pname = "__builtin_sysv_va_list";
41507 #undef TARGET_SCHED_DISPATCH
41508 #define TARGET_SCHED_DISPATCH has_dispatch
41509 #undef TARGET_SCHED_DISPATCH_DO
41510 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41511 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41512 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41513 #undef TARGET_SCHED_REORDER
41514 #define TARGET_SCHED_REORDER ix86_sched_reorder
41515 #undef TARGET_SCHED_ADJUST_PRIORITY
41516 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41517 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41518 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
41519 ix86_dependencies_evaluation_hook
41521 /* The size of the dispatch window is the total number of bytes of
41522 object code allowed in a window. */
41523 #define DISPATCH_WINDOW_SIZE 16
41525 /* Number of dispatch windows considered for scheduling. */
41526 #define MAX_DISPATCH_WINDOWS 3
41528 /* Maximum number of instructions in a window. */
41531 /* Maximum number of immediate operands in a window. */
41534 /* Maximum number of immediate bits allowed in a window. */
41535 #define MAX_IMM_SIZE 128
41537 /* Maximum number of 32 bit immediates allowed in a window. */
41538 #define MAX_IMM_32 4
41540 /* Maximum number of 64 bit immediates allowed in a window. */
41541 #define MAX_IMM_64 2
41543 /* Maximum total of loads or prefetches allowed in a window. */
41546 /* Maximum total of stores allowed in a window. */
41547 #define MAX_STORE 1
41553 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41554 enum dispatch_group {
41569 /* Number of allowable groups in a dispatch window. It is an array
41570 indexed by dispatch_group enum. 100 is used as a big number,
41571 because the number of these kind of operations does not have any
41572 effect in dispatch window, but we need them for other reasons in
41574 static unsigned int num_allowable_groups[disp_last] = {
41575 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
41578 char group_name[disp_last + 1][16] = {
41579 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41580 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41581 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41584 /* Instruction path. */
41587 path_single, /* Single micro op. */
41588 path_double, /* Double micro op. */
41589 path_multi, /* Instructions with more than 2 micro op.. */
41593 /* sched_insn_info defines a window to the instructions scheduled in
41594 the basic block. It contains a pointer to the insn_info table and
41595 the instruction scheduled.
41597 Windows are allocated for each basic block and are linked
41599 typedef struct sched_insn_info_s {
41601 enum dispatch_group group;
41602 enum insn_path path;
41607 /* Linked list of dispatch windows. This is a two way list of
41608 dispatch windows of a basic block. It contains information about
41609 the number of uops in the window and the total number of
41610 instructions and of bytes in the object code for this dispatch
41612 typedef struct dispatch_windows_s {
41613 int num_insn; /* Number of insn in the window. */
41614 int num_uops; /* Number of uops in the window. */
41615 int window_size; /* Number of bytes in the window. */
41616 int window_num; /* Window number between 0 or 1. */
41617 int num_imm; /* Number of immediates in an insn. */
41618 int num_imm_32; /* Number of 32 bit immediates in an insn. */
41619 int num_imm_64; /* Number of 64 bit immediates in an insn. */
41620 int imm_size; /* Total immediates in the window. */
41621 int num_loads; /* Total memory loads in the window. */
41622 int num_stores; /* Total memory stores in the window. */
41623 int violation; /* Violation exists in window. */
41624 sched_insn_info *window; /* Pointer to the window. */
41625 struct dispatch_windows_s *next;
41626 struct dispatch_windows_s *prev;
41627 } dispatch_windows;
41629 /* Immediate valuse used in an insn. */
41630 typedef struct imm_info_s
41637 static dispatch_windows *dispatch_window_list;
41638 static dispatch_windows *dispatch_window_list1;
41640 /* Get dispatch group of insn. */
41642 static enum dispatch_group
41643 get_mem_group (rtx insn)
41645 enum attr_memory memory;
41647 if (INSN_CODE (insn) < 0)
41648 return disp_no_group;
41649 memory = get_attr_memory (insn);
41650 if (memory == MEMORY_STORE)
41653 if (memory == MEMORY_LOAD)
41656 if (memory == MEMORY_BOTH)
41657 return disp_load_store;
41659 return disp_no_group;
41662 /* Return true if insn is a compare instruction. */
41667 enum attr_type type;
41669 type = get_attr_type (insn);
41670 return (type == TYPE_TEST
41671 || type == TYPE_ICMP
41672 || type == TYPE_FCMP
41673 || GET_CODE (PATTERN (insn)) == COMPARE);
41676 /* Return true if a dispatch violation encountered. */
41679 dispatch_violation (void)
41681 if (dispatch_window_list->next)
41682 return dispatch_window_list->next->violation;
41683 return dispatch_window_list->violation;
41686 /* Return true if insn is a branch instruction. */
41689 is_branch (rtx insn)
41691 return (CALL_P (insn) || JUMP_P (insn));
41694 /* Return true if insn is a prefetch instruction. */
41697 is_prefetch (rtx insn)
41699 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
41702 /* This function initializes a dispatch window and the list container holding a
41703 pointer to the window. */
41706 init_window (int window_num)
41709 dispatch_windows *new_list;
41711 if (window_num == 0)
41712 new_list = dispatch_window_list;
41714 new_list = dispatch_window_list1;
41716 new_list->num_insn = 0;
41717 new_list->num_uops = 0;
41718 new_list->window_size = 0;
41719 new_list->next = NULL;
41720 new_list->prev = NULL;
41721 new_list->window_num = window_num;
41722 new_list->num_imm = 0;
41723 new_list->num_imm_32 = 0;
41724 new_list->num_imm_64 = 0;
41725 new_list->imm_size = 0;
41726 new_list->num_loads = 0;
41727 new_list->num_stores = 0;
41728 new_list->violation = false;
41730 for (i = 0; i < MAX_INSN; i++)
41732 new_list->window[i].insn = NULL;
41733 new_list->window[i].group = disp_no_group;
41734 new_list->window[i].path = no_path;
41735 new_list->window[i].byte_len = 0;
41736 new_list->window[i].imm_bytes = 0;
41741 /* This function allocates and initializes a dispatch window and the
41742 list container holding a pointer to the window. */
41744 static dispatch_windows *
41745 allocate_window (void)
41747 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
41748 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
41753 /* This routine initializes the dispatch scheduling information. It
41754 initiates building dispatch scheduler tables and constructs the
41755 first dispatch window. */
41758 init_dispatch_sched (void)
41760 /* Allocate a dispatch list and a window. */
41761 dispatch_window_list = allocate_window ();
41762 dispatch_window_list1 = allocate_window ();
41767 /* This function returns true if a branch is detected. End of a basic block
41768 does not have to be a branch, but here we assume only branches end a
41772 is_end_basic_block (enum dispatch_group group)
41774 return group == disp_branch;
41777 /* This function is called when the end of a window processing is reached. */
41780 process_end_window (void)
41782 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
41783 if (dispatch_window_list->next)
41785 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
41786 gcc_assert (dispatch_window_list->window_size
41787 + dispatch_window_list1->window_size <= 48);
41793 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41794 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41795 for 48 bytes of instructions. Note that these windows are not dispatch
41796 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41798 static dispatch_windows *
41799 allocate_next_window (int window_num)
41801 if (window_num == 0)
41803 if (dispatch_window_list->next)
41806 return dispatch_window_list;
41809 dispatch_window_list->next = dispatch_window_list1;
41810 dispatch_window_list1->prev = dispatch_window_list;
41812 return dispatch_window_list1;
41815 /* Increment the number of immediate operands of an instruction. */
41818 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
41823 switch ( GET_CODE (*in_rtx))
41828 (imm_values->imm)++;
41829 if (x86_64_immediate_operand (*in_rtx, SImode))
41830 (imm_values->imm32)++;
41832 (imm_values->imm64)++;
41836 (imm_values->imm)++;
41837 (imm_values->imm64)++;
41841 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
41843 (imm_values->imm)++;
41844 (imm_values->imm32)++;
41855 /* Compute number of immediate operands of an instruction. */
41858 find_constant (rtx in_rtx, imm_info *imm_values)
41860 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
41861 (rtx_function) find_constant_1, (void *) imm_values);
41864 /* Return total size of immediate operands of an instruction along with number
41865 of corresponding immediate-operands. It initializes its parameters to zero
41866 befor calling FIND_CONSTANT.
41867 INSN is the input instruction. IMM is the total of immediates.
41868 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41872 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
41874 imm_info imm_values = {0, 0, 0};
41876 find_constant (insn, &imm_values);
41877 *imm = imm_values.imm;
41878 *imm32 = imm_values.imm32;
41879 *imm64 = imm_values.imm64;
41880 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
41883 /* This function indicates if an operand of an instruction is an
41887 has_immediate (rtx insn)
41889 int num_imm_operand;
41890 int num_imm32_operand;
41891 int num_imm64_operand;
41894 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
41895 &num_imm64_operand);
41899 /* Return single or double path for instructions. */
41901 static enum insn_path
41902 get_insn_path (rtx insn)
41904 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
41906 if ((int)path == 0)
41907 return path_single;
41909 if ((int)path == 1)
41910 return path_double;
41915 /* Return insn dispatch group. */
41917 static enum dispatch_group
41918 get_insn_group (rtx insn)
41920 enum dispatch_group group = get_mem_group (insn);
41924 if (is_branch (insn))
41925 return disp_branch;
41930 if (has_immediate (insn))
41933 if (is_prefetch (insn))
41934 return disp_prefetch;
41936 return disp_no_group;
41939 /* Count number of GROUP restricted instructions in a dispatch
41940 window WINDOW_LIST. */
41943 count_num_restricted (rtx insn, dispatch_windows *window_list)
41945 enum dispatch_group group = get_insn_group (insn);
41947 int num_imm_operand;
41948 int num_imm32_operand;
41949 int num_imm64_operand;
41951 if (group == disp_no_group)
41954 if (group == disp_imm)
41956 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
41957 &num_imm64_operand);
41958 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
41959 || num_imm_operand + window_list->num_imm > MAX_IMM
41960 || (num_imm32_operand > 0
41961 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
41962 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
41963 || (num_imm64_operand > 0
41964 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
41965 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
41966 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
41967 && num_imm64_operand > 0
41968 && ((window_list->num_imm_64 > 0
41969 && window_list->num_insn >= 2)
41970 || window_list->num_insn >= 3)))
41976 if ((group == disp_load_store
41977 && (window_list->num_loads >= MAX_LOAD
41978 || window_list->num_stores >= MAX_STORE))
41979 || ((group == disp_load
41980 || group == disp_prefetch)
41981 && window_list->num_loads >= MAX_LOAD)
41982 || (group == disp_store
41983 && window_list->num_stores >= MAX_STORE))
41989 /* This function returns true if insn satisfies dispatch rules on the
41990 last window scheduled. */
41993 fits_dispatch_window (rtx insn)
41995 dispatch_windows *window_list = dispatch_window_list;
41996 dispatch_windows *window_list_next = dispatch_window_list->next;
41997 unsigned int num_restrict;
41998 enum dispatch_group group = get_insn_group (insn);
41999 enum insn_path path = get_insn_path (insn);
42002 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
42003 instructions should be given the lowest priority in the
42004 scheduling process in Haifa scheduler to make sure they will be
42005 scheduled in the same dispatch window as the reference to them. */
42006 if (group == disp_jcc || group == disp_cmp)
42009 /* Check nonrestricted. */
42010 if (group == disp_no_group || group == disp_branch)
42013 /* Get last dispatch window. */
42014 if (window_list_next)
42015 window_list = window_list_next;
42017 if (window_list->window_num == 1)
42019 sum = window_list->prev->window_size + window_list->window_size;
42022 || (min_insn_size (insn) + sum) >= 48)
42023 /* Window 1 is full. Go for next window. */
42027 num_restrict = count_num_restricted (insn, window_list);
42029 if (num_restrict > num_allowable_groups[group])
42032 /* See if it fits in the first window. */
42033 if (window_list->window_num == 0)
42035 /* The first widow should have only single and double path
42037 if (path == path_double
42038 && (window_list->num_uops + 2) > MAX_INSN)
42040 else if (path != path_single)
42046 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42047 dispatch window WINDOW_LIST. */
42050 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
42052 int byte_len = min_insn_size (insn);
42053 int num_insn = window_list->num_insn;
42055 sched_insn_info *window = window_list->window;
42056 enum dispatch_group group = get_insn_group (insn);
42057 enum insn_path path = get_insn_path (insn);
42058 int num_imm_operand;
42059 int num_imm32_operand;
42060 int num_imm64_operand;
42062 if (!window_list->violation && group != disp_cmp
42063 && !fits_dispatch_window (insn))
42064 window_list->violation = true;
42066 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
42067 &num_imm64_operand);
42069 /* Initialize window with new instruction. */
42070 window[num_insn].insn = insn;
42071 window[num_insn].byte_len = byte_len;
42072 window[num_insn].group = group;
42073 window[num_insn].path = path;
42074 window[num_insn].imm_bytes = imm_size;
42076 window_list->window_size += byte_len;
42077 window_list->num_insn = num_insn + 1;
42078 window_list->num_uops = window_list->num_uops + num_uops;
42079 window_list->imm_size += imm_size;
42080 window_list->num_imm += num_imm_operand;
42081 window_list->num_imm_32 += num_imm32_operand;
42082 window_list->num_imm_64 += num_imm64_operand;
42084 if (group == disp_store)
42085 window_list->num_stores += 1;
42086 else if (group == disp_load
42087 || group == disp_prefetch)
42088 window_list->num_loads += 1;
42089 else if (group == disp_load_store)
42091 window_list->num_stores += 1;
42092 window_list->num_loads += 1;
42096 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42097 If the total bytes of instructions or the number of instructions in
42098 the window exceed allowable, it allocates a new window. */
42101 add_to_dispatch_window (rtx insn)
42104 dispatch_windows *window_list;
42105 dispatch_windows *next_list;
42106 dispatch_windows *window0_list;
42107 enum insn_path path;
42108 enum dispatch_group insn_group;
42116 if (INSN_CODE (insn) < 0)
42119 byte_len = min_insn_size (insn);
42120 window_list = dispatch_window_list;
42121 next_list = window_list->next;
42122 path = get_insn_path (insn);
42123 insn_group = get_insn_group (insn);
42125 /* Get the last dispatch window. */
42127 window_list = dispatch_window_list->next;
42129 if (path == path_single)
42131 else if (path == path_double)
42134 insn_num_uops = (int) path;
42136 /* If current window is full, get a new window.
42137 Window number zero is full, if MAX_INSN uops are scheduled in it.
42138 Window number one is full, if window zero's bytes plus window
42139 one's bytes is 32, or if the bytes of the new instruction added
42140 to the total makes it greater than 48, or it has already MAX_INSN
42141 instructions in it. */
42142 num_insn = window_list->num_insn;
42143 num_uops = window_list->num_uops;
42144 window_num = window_list->window_num;
42145 insn_fits = fits_dispatch_window (insn);
42147 if (num_insn >= MAX_INSN
42148 || num_uops + insn_num_uops > MAX_INSN
42151 window_num = ~window_num & 1;
42152 window_list = allocate_next_window (window_num);
42155 if (window_num == 0)
42157 add_insn_window (insn, window_list, insn_num_uops);
42158 if (window_list->num_insn >= MAX_INSN
42159 && insn_group == disp_branch)
42161 process_end_window ();
42165 else if (window_num == 1)
42167 window0_list = window_list->prev;
42168 sum = window0_list->window_size + window_list->window_size;
42170 || (byte_len + sum) >= 48)
42172 process_end_window ();
42173 window_list = dispatch_window_list;
42176 add_insn_window (insn, window_list, insn_num_uops);
42179 gcc_unreachable ();
42181 if (is_end_basic_block (insn_group))
42183 /* End of basic block is reached do end-basic-block process. */
42184 process_end_window ();
42189 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42191 DEBUG_FUNCTION static void
42192 debug_dispatch_window_file (FILE *file, int window_num)
42194 dispatch_windows *list;
42197 if (window_num == 0)
42198 list = dispatch_window_list;
42200 list = dispatch_window_list1;
42202 fprintf (file, "Window #%d:\n", list->window_num);
42203 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
42204 list->num_insn, list->num_uops, list->window_size);
42205 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42206 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
42208 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
42210 fprintf (file, " insn info:\n");
42212 for (i = 0; i < MAX_INSN; i++)
42214 if (!list->window[i].insn)
42216 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42217 i, group_name[list->window[i].group],
42218 i, (void *)list->window[i].insn,
42219 i, list->window[i].path,
42220 i, list->window[i].byte_len,
42221 i, list->window[i].imm_bytes);
42225 /* Print to stdout a dispatch window. */
42227 DEBUG_FUNCTION void
42228 debug_dispatch_window (int window_num)
42230 debug_dispatch_window_file (stdout, window_num);
42233 /* Print INSN dispatch information to FILE. */
42235 DEBUG_FUNCTION static void
42236 debug_insn_dispatch_info_file (FILE *file, rtx insn)
42239 enum insn_path path;
42240 enum dispatch_group group;
42242 int num_imm_operand;
42243 int num_imm32_operand;
42244 int num_imm64_operand;
42246 if (INSN_CODE (insn) < 0)
42249 byte_len = min_insn_size (insn);
42250 path = get_insn_path (insn);
42251 group = get_insn_group (insn);
42252 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
42253 &num_imm64_operand);
42255 fprintf (file, " insn info:\n");
42256 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
42257 group_name[group], path, byte_len);
42258 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42259 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
42262 /* Print to STDERR the status of the ready list with respect to
42263 dispatch windows. */
42265 DEBUG_FUNCTION void
42266 debug_ready_dispatch (void)
42269 int no_ready = number_in_ready ();
42271 fprintf (stdout, "Number of ready: %d\n", no_ready);
42273 for (i = 0; i < no_ready; i++)
42274 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
42277 /* This routine is the driver of the dispatch scheduler. */
42280 do_dispatch (rtx insn, int mode)
42282 if (mode == DISPATCH_INIT)
42283 init_dispatch_sched ();
42284 else if (mode == ADD_TO_DISPATCH_WINDOW)
42285 add_to_dispatch_window (insn);
42288 /* Return TRUE if Dispatch Scheduling is supported. */
42291 has_dispatch (rtx insn, int action)
42293 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3)
42294 && flag_dispatch_scheduler)
42300 case IS_DISPATCH_ON:
42305 return is_cmp (insn);
42307 case DISPATCH_VIOLATION:
42308 return dispatch_violation ();
42310 case FITS_DISPATCH_WINDOW:
42311 return fits_dispatch_window (insn);
42317 /* Implementation of reassociation_width target hook used by
42318 reassoc phase to identify parallelism level in reassociated
42319 tree. Statements tree_code is passed in OPC. Arguments type
42322 Currently parallel reassociation is enabled for Atom
42323 processors only and we set reassociation width to be 2
42324 because Atom may issue up to 2 instructions per cycle.
42326 Return value should be fixed if parallel reassociation is
42327 enabled for other processors. */
42330 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
42331 enum machine_mode mode)
42335 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
42337 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
42343 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42344 place emms and femms instructions. */
42346 static enum machine_mode
42347 ix86_preferred_simd_mode (enum machine_mode mode)
42355 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
42357 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
42359 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
42361 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
42364 if (TARGET_AVX && !TARGET_PREFER_AVX128)
42370 if (!TARGET_VECTORIZE_DOUBLE)
42372 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
42374 else if (TARGET_SSE2)
42383 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42386 static unsigned int
42387 ix86_autovectorize_vector_sizes (void)
42389 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
42394 /* Return class of registers which could be used for pseudo of MODE
42395 and of class RCLASS for spilling instead of memory. Return NO_REGS
42396 if it is not possible or non-profitable. */
42398 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
42400 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
42401 && (mode == SImode || (TARGET_64BIT && mode == DImode))
42402 && INTEGER_CLASS_P (rclass))
42407 /* Implement targetm.vectorize.init_cost. */
42410 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
42412 unsigned *cost = XNEWVEC (unsigned, 3);
42413 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
42417 /* Implement targetm.vectorize.add_stmt_cost. */
42420 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
42421 struct _stmt_vec_info *stmt_info, int misalign,
42422 enum vect_cost_model_location where)
42424 unsigned *cost = (unsigned *) data;
42425 unsigned retval = 0;
42427 if (flag_vect_cost_model)
42429 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
42430 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
42432 /* Statements in an inner loop relative to the loop being
42433 vectorized are weighted more heavily. The value here is
42434 arbitrary and could potentially be improved with analysis. */
42435 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
42436 count *= 50; /* FIXME. */
42438 retval = (unsigned) (count * stmt_cost);
42439 cost[where] += retval;
42445 /* Implement targetm.vectorize.finish_cost. */
42448 ix86_finish_cost (void *data, unsigned *prologue_cost,
42449 unsigned *body_cost, unsigned *epilogue_cost)
42451 unsigned *cost = (unsigned *) data;
42452 *prologue_cost = cost[vect_prologue];
42453 *body_cost = cost[vect_body];
42454 *epilogue_cost = cost[vect_epilogue];
42457 /* Implement targetm.vectorize.destroy_cost_data. */
42460 ix86_destroy_cost_data (void *data)
42465 /* Validate target specific memory model bits in VAL. */
42467 static unsigned HOST_WIDE_INT
42468 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
42470 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
42473 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
42475 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
42477 warning (OPT_Winvalid_memory_model,
42478 "Unknown architecture specific memory model");
42479 return MEMMODEL_SEQ_CST;
42481 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
42482 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
42484 warning (OPT_Winvalid_memory_model,
42485 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42486 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
42488 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
42490 warning (OPT_Winvalid_memory_model,
42491 "HLE_RELEASE not used with RELEASE or stronger memory model");
42492 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
42497 /* Initialize the GCC target structure. */
42498 #undef TARGET_RETURN_IN_MEMORY
42499 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42501 #undef TARGET_LEGITIMIZE_ADDRESS
42502 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42504 #undef TARGET_ATTRIBUTE_TABLE
42505 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42506 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42507 # undef TARGET_MERGE_DECL_ATTRIBUTES
42508 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42511 #undef TARGET_COMP_TYPE_ATTRIBUTES
42512 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42514 #undef TARGET_INIT_BUILTINS
42515 #define TARGET_INIT_BUILTINS ix86_init_builtins
42516 #undef TARGET_BUILTIN_DECL
42517 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42518 #undef TARGET_EXPAND_BUILTIN
42519 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42521 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42522 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42523 ix86_builtin_vectorized_function
42525 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42526 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42528 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42529 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42531 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42532 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42534 #undef TARGET_BUILTIN_RECIPROCAL
42535 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42537 #undef TARGET_ASM_FUNCTION_EPILOGUE
42538 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42540 #undef TARGET_ENCODE_SECTION_INFO
42541 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42542 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42544 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42547 #undef TARGET_ASM_OPEN_PAREN
42548 #define TARGET_ASM_OPEN_PAREN ""
42549 #undef TARGET_ASM_CLOSE_PAREN
42550 #define TARGET_ASM_CLOSE_PAREN ""
42552 #undef TARGET_ASM_BYTE_OP
42553 #define TARGET_ASM_BYTE_OP ASM_BYTE
42555 #undef TARGET_ASM_ALIGNED_HI_OP
42556 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42557 #undef TARGET_ASM_ALIGNED_SI_OP
42558 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42560 #undef TARGET_ASM_ALIGNED_DI_OP
42561 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42564 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42565 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42567 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42568 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42570 #undef TARGET_ASM_UNALIGNED_HI_OP
42571 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42572 #undef TARGET_ASM_UNALIGNED_SI_OP
42573 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42574 #undef TARGET_ASM_UNALIGNED_DI_OP
42575 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42577 #undef TARGET_PRINT_OPERAND
42578 #define TARGET_PRINT_OPERAND ix86_print_operand
42579 #undef TARGET_PRINT_OPERAND_ADDRESS
42580 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42581 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42582 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42583 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42584 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42586 #undef TARGET_SCHED_INIT_GLOBAL
42587 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42588 #undef TARGET_SCHED_ADJUST_COST
42589 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42590 #undef TARGET_SCHED_ISSUE_RATE
42591 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42592 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42593 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42594 ia32_multipass_dfa_lookahead
42596 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42597 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42599 #undef TARGET_MEMMODEL_CHECK
42600 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42603 #undef TARGET_HAVE_TLS
42604 #define TARGET_HAVE_TLS true
42606 #undef TARGET_CANNOT_FORCE_CONST_MEM
42607 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42608 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42609 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42611 #undef TARGET_DELEGITIMIZE_ADDRESS
42612 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42614 #undef TARGET_MS_BITFIELD_LAYOUT_P
42615 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42618 #undef TARGET_BINDS_LOCAL_P
42619 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42621 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42622 #undef TARGET_BINDS_LOCAL_P
42623 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42626 #undef TARGET_ASM_OUTPUT_MI_THUNK
42627 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42628 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42629 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42631 #undef TARGET_ASM_FILE_START
42632 #define TARGET_ASM_FILE_START x86_file_start
42634 #undef TARGET_OPTION_OVERRIDE
42635 #define TARGET_OPTION_OVERRIDE ix86_option_override
42637 #undef TARGET_REGISTER_MOVE_COST
42638 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42639 #undef TARGET_MEMORY_MOVE_COST
42640 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42641 #undef TARGET_RTX_COSTS
42642 #define TARGET_RTX_COSTS ix86_rtx_costs
42643 #undef TARGET_ADDRESS_COST
42644 #define TARGET_ADDRESS_COST ix86_address_cost
42646 #undef TARGET_FIXED_CONDITION_CODE_REGS
42647 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42648 #undef TARGET_CC_MODES_COMPATIBLE
42649 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42651 #undef TARGET_MACHINE_DEPENDENT_REORG
42652 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42654 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42655 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42657 #undef TARGET_BUILD_BUILTIN_VA_LIST
42658 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42660 #undef TARGET_FOLD_BUILTIN
42661 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42663 #undef TARGET_COMPARE_VERSION_PRIORITY
42664 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42666 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42667 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42668 ix86_generate_version_dispatcher_body
42670 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42671 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42672 ix86_get_function_versions_dispatcher
42674 #undef TARGET_ENUM_VA_LIST_P
42675 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42677 #undef TARGET_FN_ABI_VA_LIST
42678 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42680 #undef TARGET_CANONICAL_VA_LIST_TYPE
42681 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42683 #undef TARGET_EXPAND_BUILTIN_VA_START
42684 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42686 #undef TARGET_MD_ASM_CLOBBERS
42687 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42689 #undef TARGET_PROMOTE_PROTOTYPES
42690 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42691 #undef TARGET_STRUCT_VALUE_RTX
42692 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42693 #undef TARGET_SETUP_INCOMING_VARARGS
42694 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42695 #undef TARGET_MUST_PASS_IN_STACK
42696 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42697 #undef TARGET_FUNCTION_ARG_ADVANCE
42698 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42699 #undef TARGET_FUNCTION_ARG
42700 #define TARGET_FUNCTION_ARG ix86_function_arg
42701 #undef TARGET_FUNCTION_ARG_BOUNDARY
42702 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42703 #undef TARGET_PASS_BY_REFERENCE
42704 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42705 #undef TARGET_INTERNAL_ARG_POINTER
42706 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42707 #undef TARGET_UPDATE_STACK_BOUNDARY
42708 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42709 #undef TARGET_GET_DRAP_RTX
42710 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42711 #undef TARGET_STRICT_ARGUMENT_NAMING
42712 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42713 #undef TARGET_STATIC_CHAIN
42714 #define TARGET_STATIC_CHAIN ix86_static_chain
42715 #undef TARGET_TRAMPOLINE_INIT
42716 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42717 #undef TARGET_RETURN_POPS_ARGS
42718 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42720 #undef TARGET_LEGITIMATE_COMBINED_INSN
42721 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42723 #undef TARGET_ASAN_SHADOW_OFFSET
42724 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42726 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42727 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42729 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42730 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42732 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42733 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42735 #undef TARGET_C_MODE_FOR_SUFFIX
42736 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42739 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42740 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42743 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42744 #undef TARGET_INSERT_ATTRIBUTES
42745 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42748 #undef TARGET_MANGLE_TYPE
42749 #define TARGET_MANGLE_TYPE ix86_mangle_type
42752 #undef TARGET_STACK_PROTECT_FAIL
42753 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42756 #undef TARGET_FUNCTION_VALUE
42757 #define TARGET_FUNCTION_VALUE ix86_function_value
42759 #undef TARGET_FUNCTION_VALUE_REGNO_P
42760 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42762 #undef TARGET_PROMOTE_FUNCTION_MODE
42763 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42765 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42766 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42768 #undef TARGET_INSTANTIATE_DECLS
42769 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42771 #undef TARGET_SECONDARY_RELOAD
42772 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42774 #undef TARGET_CLASS_MAX_NREGS
42775 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42777 #undef TARGET_PREFERRED_RELOAD_CLASS
42778 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42779 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42780 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42781 #undef TARGET_CLASS_LIKELY_SPILLED_P
42782 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42784 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42785 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42786 ix86_builtin_vectorization_cost
42787 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42788 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42789 ix86_vectorize_vec_perm_const_ok
42790 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42791 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42792 ix86_preferred_simd_mode
42793 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42794 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42795 ix86_autovectorize_vector_sizes
42796 #undef TARGET_VECTORIZE_INIT_COST
42797 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42798 #undef TARGET_VECTORIZE_ADD_STMT_COST
42799 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42800 #undef TARGET_VECTORIZE_FINISH_COST
42801 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42802 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42803 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42805 #undef TARGET_SET_CURRENT_FUNCTION
42806 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42808 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42809 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42811 #undef TARGET_OPTION_SAVE
42812 #define TARGET_OPTION_SAVE ix86_function_specific_save
42814 #undef TARGET_OPTION_RESTORE
42815 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42817 #undef TARGET_OPTION_PRINT
42818 #define TARGET_OPTION_PRINT ix86_function_specific_print
42820 #undef TARGET_OPTION_FUNCTION_VERSIONS
42821 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42823 #undef TARGET_CAN_INLINE_P
42824 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42826 #undef TARGET_EXPAND_TO_RTL_HOOK
42827 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42829 #undef TARGET_LEGITIMATE_ADDRESS_P
42830 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42832 #undef TARGET_LRA_P
42833 #define TARGET_LRA_P hook_bool_void_true
42835 #undef TARGET_REGISTER_PRIORITY
42836 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42838 #undef TARGET_LEGITIMATE_CONSTANT_P
42839 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42841 #undef TARGET_FRAME_POINTER_REQUIRED
42842 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42844 #undef TARGET_CAN_ELIMINATE
42845 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42847 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42848 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42850 #undef TARGET_ASM_CODE_END
42851 #define TARGET_ASM_CODE_END ix86_code_end
42853 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42854 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42857 #undef TARGET_INIT_LIBFUNCS
42858 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42861 #undef TARGET_SPILL_CLASS
42862 #define TARGET_SPILL_CLASS ix86_spill_class
42864 struct gcc_target targetm = TARGET_INITIALIZER;
42866 #include "gt-i386.h"